Use plain -fopenacc to enable OpenACC kernels processing
[gcc.git] / gcc / omp-low.c
1 /* Lowering pass for OMP directives. Converts OMP directives into explicit
2 calls to the runtime library (libgomp), data marshalling to implement data
3 sharing and copying clauses, offloading to accelerators, and more.
4
5 Contributed by Diego Novillo <dnovillo@redhat.com>
6
7 Copyright (C) 2005-2016 Free Software Foundation, Inc.
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it under
12 the terms of the GNU General Public License as published by the Free
13 Software Foundation; either version 3, or (at your option) any later
14 version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
17 WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "alloc-pool.h"
35 #include "tree-pass.h"
36 #include "ssa.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "emit-rtl.h"
40 #include "cgraph.h"
41 #include "pretty-print.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "cfganal.h"
47 #include "internal-fn.h"
48 #include "gimple-fold.h"
49 #include "gimplify.h"
50 #include "gimple-iterator.h"
51 #include "gimplify-me.h"
52 #include "gimple-walk.h"
53 #include "tree-iterator.h"
54 #include "tree-inline.h"
55 #include "langhooks.h"
56 #include "tree-cfg.h"
57 #include "tree-into-ssa.h"
58 #include "flags.h"
59 #include "dojump.h"
60 #include "explow.h"
61 #include "calls.h"
62 #include "varasm.h"
63 #include "stmt.h"
64 #include "expr.h"
65 #include "tree-dfa.h"
66 #include "tree-ssa.h"
67 #include "except.h"
68 #include "splay-tree.h"
69 #include "cfgloop.h"
70 #include "common/common-target.h"
71 #include "omp-low.h"
72 #include "gimple-low.h"
73 #include "tree-cfgcleanup.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "tree-nested.h"
77 #include "tree-eh.h"
78 #include "cilk.h"
79 #include "context.h"
80 #include "lto-section-names.h"
81 #include "gomp-constants.h"
82 #include "gimple-pretty-print.h"
83 #include "symbol-summary.h"
84 #include "hsa.h"
85 #include "params.h"
86
87 /* Lowering of OMP parallel and workshare constructs proceeds in two
88 phases. The first phase scans the function looking for OMP statements
89 and then for variables that must be replaced to satisfy data sharing
90 clauses. The second phase expands code for the constructs, as well as
91 re-gimplifying things when variables have been replaced with complex
92 expressions.
93
94 Final code generation is done by pass_expand_omp. The flowgraph is
95 scanned for regions which are then moved to a new
96 function, to be invoked by the thread library, or offloaded. */
97
98 /* OMP region information. Every parallel and workshare
99 directive is enclosed between two markers, the OMP_* directive
100 and a corresponding GIMPLE_OMP_RETURN statement. */
101
102 struct omp_region
103 {
104 /* The enclosing region. */
105 struct omp_region *outer;
106
107 /* First child region. */
108 struct omp_region *inner;
109
110 /* Next peer region. */
111 struct omp_region *next;
112
113 /* Block containing the omp directive as its last stmt. */
114 basic_block entry;
115
116 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
117 basic_block exit;
118
119 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
120 basic_block cont;
121
122 /* If this is a combined parallel+workshare region, this is a list
123 of additional arguments needed by the combined parallel+workshare
124 library call. */
125 vec<tree, va_gc> *ws_args;
126
127 /* The code for the omp directive of this region. */
128 enum gimple_code type;
129
130 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
131 enum omp_clause_schedule_kind sched_kind;
132
133 /* Schedule modifiers. */
134 unsigned char sched_modifiers;
135
136 /* True if this is a combined parallel+workshare region. */
137 bool is_combined_parallel;
138
139 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
140 a depend clause. */
141 gomp_ordered *ord_stmt;
142 };
143
144 /* Context structure. Used to store information about each parallel
145 directive in the code. */
146
147 struct omp_context
148 {
149 /* This field must be at the beginning, as we do "inheritance": Some
150 callback functions for tree-inline.c (e.g., omp_copy_decl)
151 receive a copy_body_data pointer that is up-casted to an
152 omp_context pointer. */
153 copy_body_data cb;
154
155 /* The tree of contexts corresponding to the encountered constructs. */
156 struct omp_context *outer;
157 gimple *stmt;
158
159 /* Map variables to fields in a structure that allows communication
160 between sending and receiving threads. */
161 splay_tree field_map;
162 tree record_type;
163 tree sender_decl;
164 tree receiver_decl;
165
166 /* These are used just by task contexts, if task firstprivate fn is
167 needed. srecord_type is used to communicate from the thread
168 that encountered the task construct to task firstprivate fn,
169 record_type is allocated by GOMP_task, initialized by task firstprivate
170 fn and passed to the task body fn. */
171 splay_tree sfield_map;
172 tree srecord_type;
173
174 /* A chain of variables to add to the top-level block surrounding the
175 construct. In the case of a parallel, this is in the child function. */
176 tree block_vars;
177
178 /* Label to which GOMP_cancel{,llation_point} and explicit and implicit
179 barriers should jump to during omplower pass. */
180 tree cancel_label;
181
182 /* What to do with variables with implicitly determined sharing
183 attributes. */
184 enum omp_clause_default_kind default_kind;
185
186 /* Nesting depth of this context. Used to beautify error messages re
187 invalid gotos. The outermost ctx is depth 1, with depth 0 being
188 reserved for the main body of the function. */
189 int depth;
190
191 /* True if this parallel directive is nested within another. */
192 bool is_nested;
193
194 /* True if this construct can be cancelled. */
195 bool cancellable;
196 };
197
198 /* A structure holding the elements of:
199 for (V = N1; V cond N2; V += STEP) [...] */
200
201 struct omp_for_data_loop
202 {
203 tree v, n1, n2, step;
204 enum tree_code cond_code;
205 };
206
207 /* A structure describing the main elements of a parallel loop. */
208
209 struct omp_for_data
210 {
211 struct omp_for_data_loop loop;
212 tree chunk_size;
213 gomp_for *for_stmt;
214 tree pre, iter_type;
215 int collapse;
216 int ordered;
217 bool have_nowait, have_ordered, simd_schedule;
218 unsigned char sched_modifiers;
219 enum omp_clause_schedule_kind sched_kind;
220 struct omp_for_data_loop *loops;
221 };
222
223 /* Describe the OpenACC looping structure of a function. The entire
224 function is held in a 'NULL' loop. */
225
226 struct oacc_loop
227 {
228 oacc_loop *parent; /* Containing loop. */
229
230 oacc_loop *child; /* First inner loop. */
231
232 oacc_loop *sibling; /* Next loop within same parent. */
233
234 location_t loc; /* Location of the loop start. */
235
236 gcall *marker; /* Initial head marker. */
237
238 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
239 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
240
241 tree routine; /* Pseudo-loop enclosing a routine. */
242
243 unsigned mask; /* Partitioning mask. */
244 unsigned flags; /* Partitioning flags. */
245 tree chunk_size; /* Chunk size. */
246 gcall *head_end; /* Final marker of head sequence. */
247 };
248
249 /* Flags for an OpenACC loop. */
250
251 enum oacc_loop_flags {
252 OLF_SEQ = 1u << 0, /* Explicitly sequential */
253 OLF_AUTO = 1u << 1, /* Compiler chooses axes. */
254 OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */
255 OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */
256
257 /* Explicitly specified loop axes. */
258 OLF_DIM_BASE = 4,
259 OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG),
260 OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER),
261 OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR),
262
263 OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX
264 };
265
266
267 static splay_tree all_contexts;
268 static int taskreg_nesting_level;
269 static int target_nesting_level;
270 static struct omp_region *root_omp_region;
271 static bitmap task_shared_vars;
272 static vec<omp_context *> taskreg_contexts;
273 static bool omp_any_child_fn_dumped;
274
275 static void scan_omp (gimple_seq *, omp_context *);
276 static tree scan_omp_1_op (tree *, int *, void *);
277 static gphi *find_phi_with_arg_on_edge (tree, edge);
278
279 #define WALK_SUBSTMTS \
280 case GIMPLE_BIND: \
281 case GIMPLE_TRY: \
282 case GIMPLE_CATCH: \
283 case GIMPLE_EH_FILTER: \
284 case GIMPLE_TRANSACTION: \
285 /* The sub-statements for these should be walked. */ \
286 *handled_ops_p = false; \
287 break;
288
289 /* Return true if CTX corresponds to an oacc parallel region. */
290
291 static bool
292 is_oacc_parallel (omp_context *ctx)
293 {
294 enum gimple_code outer_type = gimple_code (ctx->stmt);
295 return ((outer_type == GIMPLE_OMP_TARGET)
296 && (gimple_omp_target_kind (ctx->stmt)
297 == GF_OMP_TARGET_KIND_OACC_PARALLEL));
298 }
299
300 /* Return true if CTX corresponds to an oacc kernels region. */
301
302 static bool
303 is_oacc_kernels (omp_context *ctx)
304 {
305 enum gimple_code outer_type = gimple_code (ctx->stmt);
306 return ((outer_type == GIMPLE_OMP_TARGET)
307 && (gimple_omp_target_kind (ctx->stmt)
308 == GF_OMP_TARGET_KIND_OACC_KERNELS));
309 }
310
311 /* If DECL is the artificial dummy VAR_DECL created for non-static
312 data member privatization, return the underlying "this" parameter,
313 otherwise return NULL. */
314
315 tree
316 omp_member_access_dummy_var (tree decl)
317 {
318 if (!VAR_P (decl)
319 || !DECL_ARTIFICIAL (decl)
320 || !DECL_IGNORED_P (decl)
321 || !DECL_HAS_VALUE_EXPR_P (decl)
322 || !lang_hooks.decls.omp_disregard_value_expr (decl, false))
323 return NULL_TREE;
324
325 tree v = DECL_VALUE_EXPR (decl);
326 if (TREE_CODE (v) != COMPONENT_REF)
327 return NULL_TREE;
328
329 while (1)
330 switch (TREE_CODE (v))
331 {
332 case COMPONENT_REF:
333 case MEM_REF:
334 case INDIRECT_REF:
335 CASE_CONVERT:
336 case POINTER_PLUS_EXPR:
337 v = TREE_OPERAND (v, 0);
338 continue;
339 case PARM_DECL:
340 if (DECL_CONTEXT (v) == current_function_decl
341 && DECL_ARTIFICIAL (v)
342 && TREE_CODE (TREE_TYPE (v)) == POINTER_TYPE)
343 return v;
344 return NULL_TREE;
345 default:
346 return NULL_TREE;
347 }
348 }
349
350 /* Helper for unshare_and_remap, called through walk_tree. */
351
352 static tree
353 unshare_and_remap_1 (tree *tp, int *walk_subtrees, void *data)
354 {
355 tree *pair = (tree *) data;
356 if (*tp == pair[0])
357 {
358 *tp = unshare_expr (pair[1]);
359 *walk_subtrees = 0;
360 }
361 else if (IS_TYPE_OR_DECL_P (*tp))
362 *walk_subtrees = 0;
363 return NULL_TREE;
364 }
365
366 /* Return unshare_expr (X) with all occurrences of FROM
367 replaced with TO. */
368
369 static tree
370 unshare_and_remap (tree x, tree from, tree to)
371 {
372 tree pair[2] = { from, to };
373 x = unshare_expr (x);
374 walk_tree (&x, unshare_and_remap_1, pair, NULL);
375 return x;
376 }
377
378 /* Holds offload tables with decls. */
379 vec<tree, va_gc> *offload_funcs, *offload_vars;
380
381 /* Convenience function for calling scan_omp_1_op on tree operands. */
382
383 static inline tree
384 scan_omp_op (tree *tp, omp_context *ctx)
385 {
386 struct walk_stmt_info wi;
387
388 memset (&wi, 0, sizeof (wi));
389 wi.info = ctx;
390 wi.want_locations = true;
391
392 return walk_tree (tp, scan_omp_1_op, &wi, NULL);
393 }
394
395 static void lower_omp (gimple_seq *, omp_context *);
396 static tree lookup_decl_in_outer_ctx (tree, omp_context *);
397 static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *);
398
399 /* Find an OMP clause of type KIND within CLAUSES. */
400
401 tree
402 find_omp_clause (tree clauses, enum omp_clause_code kind)
403 {
404 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
405 if (OMP_CLAUSE_CODE (clauses) == kind)
406 return clauses;
407
408 return NULL_TREE;
409 }
410
411 /* Return true if CTX is for an omp parallel. */
412
413 static inline bool
414 is_parallel_ctx (omp_context *ctx)
415 {
416 return gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL;
417 }
418
419
420 /* Return true if CTX is for an omp task. */
421
422 static inline bool
423 is_task_ctx (omp_context *ctx)
424 {
425 return gimple_code (ctx->stmt) == GIMPLE_OMP_TASK;
426 }
427
428
429 /* Return true if CTX is for an omp taskloop. */
430
431 static inline bool
432 is_taskloop_ctx (omp_context *ctx)
433 {
434 return gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
435 && gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP;
436 }
437
438
439 /* Return true if CTX is for an omp parallel or omp task. */
440
441 static inline bool
442 is_taskreg_ctx (omp_context *ctx)
443 {
444 return is_parallel_ctx (ctx) || is_task_ctx (ctx);
445 }
446
447
448 /* Return true if REGION is a combined parallel+workshare region. */
449
450 static inline bool
451 is_combined_parallel (struct omp_region *region)
452 {
453 return region->is_combined_parallel;
454 }
455
456 /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or
457 GT_EXPR. */
458
459 static void
460 adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2)
461 {
462 switch (*cond_code)
463 {
464 case LT_EXPR:
465 case GT_EXPR:
466 case NE_EXPR:
467 break;
468 case LE_EXPR:
469 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
470 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1);
471 else
472 *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2,
473 build_int_cst (TREE_TYPE (*n2), 1));
474 *cond_code = LT_EXPR;
475 break;
476 case GE_EXPR:
477 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
478 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1);
479 else
480 *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2,
481 build_int_cst (TREE_TYPE (*n2), 1));
482 *cond_code = GT_EXPR;
483 break;
484 default:
485 gcc_unreachable ();
486 }
487 }
488
489 /* Return the looping step from INCR, extracted from the step of a gimple omp
490 for statement. */
491
492 static tree
493 get_omp_for_step_from_incr (location_t loc, tree incr)
494 {
495 tree step;
496 switch (TREE_CODE (incr))
497 {
498 case PLUS_EXPR:
499 step = TREE_OPERAND (incr, 1);
500 break;
501 case POINTER_PLUS_EXPR:
502 step = fold_convert (ssizetype, TREE_OPERAND (incr, 1));
503 break;
504 case MINUS_EXPR:
505 step = TREE_OPERAND (incr, 1);
506 step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step);
507 break;
508 default:
509 gcc_unreachable ();
510 }
511 return step;
512 }
513
514 /* Extract the header elements of parallel loop FOR_STMT and store
515 them into *FD. */
516
517 static void
518 extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
519 struct omp_for_data_loop *loops)
520 {
521 tree t, var, *collapse_iter, *collapse_count;
522 tree count = NULL_TREE, iter_type = long_integer_type_node;
523 struct omp_for_data_loop *loop;
524 int i;
525 struct omp_for_data_loop dummy_loop;
526 location_t loc = gimple_location (for_stmt);
527 bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD;
528 bool distribute = gimple_omp_for_kind (for_stmt)
529 == GF_OMP_FOR_KIND_DISTRIBUTE;
530 bool taskloop = gimple_omp_for_kind (for_stmt)
531 == GF_OMP_FOR_KIND_TASKLOOP;
532 tree iterv, countv;
533
534 fd->for_stmt = for_stmt;
535 fd->pre = NULL;
536 if (gimple_omp_for_collapse (for_stmt) > 1)
537 fd->loops = loops;
538 else
539 fd->loops = &fd->loop;
540
541 fd->have_nowait = distribute || simd;
542 fd->have_ordered = false;
543 fd->collapse = 1;
544 fd->ordered = 0;
545 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
546 fd->sched_modifiers = 0;
547 fd->chunk_size = NULL_TREE;
548 fd->simd_schedule = false;
549 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
550 fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR;
551 collapse_iter = NULL;
552 collapse_count = NULL;
553
554 for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
555 switch (OMP_CLAUSE_CODE (t))
556 {
557 case OMP_CLAUSE_NOWAIT:
558 fd->have_nowait = true;
559 break;
560 case OMP_CLAUSE_ORDERED:
561 fd->have_ordered = true;
562 if (OMP_CLAUSE_ORDERED_EXPR (t))
563 fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t));
564 break;
565 case OMP_CLAUSE_SCHEDULE:
566 gcc_assert (!distribute && !taskloop);
567 fd->sched_kind
568 = (enum omp_clause_schedule_kind)
569 (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK);
570 fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t)
571 & ~OMP_CLAUSE_SCHEDULE_MASK);
572 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
573 fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t);
574 break;
575 case OMP_CLAUSE_DIST_SCHEDULE:
576 gcc_assert (distribute);
577 fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
578 break;
579 case OMP_CLAUSE_COLLAPSE:
580 fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t));
581 if (fd->collapse > 1)
582 {
583 collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
584 collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t);
585 }
586 break;
587 default:
588 break;
589 }
590 if (fd->ordered && fd->collapse == 1 && loops != NULL)
591 {
592 fd->loops = loops;
593 iterv = NULL_TREE;
594 countv = NULL_TREE;
595 collapse_iter = &iterv;
596 collapse_count = &countv;
597 }
598
599 /* FIXME: for now map schedule(auto) to schedule(static).
600 There should be analysis to determine whether all iterations
601 are approximately the same amount of work (then schedule(static)
602 is best) or if it varies (then schedule(dynamic,N) is better). */
603 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO)
604 {
605 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
606 gcc_assert (fd->chunk_size == NULL);
607 }
608 gcc_assert (fd->collapse == 1 || collapse_iter != NULL);
609 if (taskloop)
610 fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME;
611 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
612 gcc_assert (fd->chunk_size == NULL);
613 else if (fd->chunk_size == NULL)
614 {
615 /* We only need to compute a default chunk size for ordered
616 static loops and dynamic loops. */
617 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
618 || fd->have_ordered)
619 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
620 ? integer_zero_node : integer_one_node;
621 }
622
623 int cnt = fd->ordered ? fd->ordered : fd->collapse;
624 for (i = 0; i < cnt; i++)
625 {
626 if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL))
627 loop = &fd->loop;
628 else if (loops != NULL)
629 loop = loops + i;
630 else
631 loop = &dummy_loop;
632
633 loop->v = gimple_omp_for_index (for_stmt, i);
634 gcc_assert (SSA_VAR_P (loop->v));
635 gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
636 || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE);
637 var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v;
638 loop->n1 = gimple_omp_for_initial (for_stmt, i);
639
640 loop->cond_code = gimple_omp_for_cond (for_stmt, i);
641 loop->n2 = gimple_omp_for_final (for_stmt, i);
642 gcc_assert (loop->cond_code != NE_EXPR
643 || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD
644 || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR);
645 adjust_for_condition (loc, &loop->cond_code, &loop->n2);
646
647 t = gimple_omp_for_incr (for_stmt, i);
648 gcc_assert (TREE_OPERAND (t, 0) == var);
649 loop->step = get_omp_for_step_from_incr (loc, t);
650
651 if (simd
652 || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
653 && !fd->have_ordered))
654 {
655 if (fd->collapse == 1)
656 iter_type = TREE_TYPE (loop->v);
657 else if (i == 0
658 || TYPE_PRECISION (iter_type)
659 < TYPE_PRECISION (TREE_TYPE (loop->v)))
660 iter_type
661 = build_nonstandard_integer_type
662 (TYPE_PRECISION (TREE_TYPE (loop->v)), 1);
663 }
664 else if (iter_type != long_long_unsigned_type_node)
665 {
666 if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
667 iter_type = long_long_unsigned_type_node;
668 else if (TYPE_UNSIGNED (TREE_TYPE (loop->v))
669 && TYPE_PRECISION (TREE_TYPE (loop->v))
670 >= TYPE_PRECISION (iter_type))
671 {
672 tree n;
673
674 if (loop->cond_code == LT_EXPR)
675 n = fold_build2_loc (loc,
676 PLUS_EXPR, TREE_TYPE (loop->v),
677 loop->n2, loop->step);
678 else
679 n = loop->n1;
680 if (TREE_CODE (n) != INTEGER_CST
681 || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n))
682 iter_type = long_long_unsigned_type_node;
683 }
684 else if (TYPE_PRECISION (TREE_TYPE (loop->v))
685 > TYPE_PRECISION (iter_type))
686 {
687 tree n1, n2;
688
689 if (loop->cond_code == LT_EXPR)
690 {
691 n1 = loop->n1;
692 n2 = fold_build2_loc (loc,
693 PLUS_EXPR, TREE_TYPE (loop->v),
694 loop->n2, loop->step);
695 }
696 else
697 {
698 n1 = fold_build2_loc (loc,
699 MINUS_EXPR, TREE_TYPE (loop->v),
700 loop->n2, loop->step);
701 n2 = loop->n1;
702 }
703 if (TREE_CODE (n1) != INTEGER_CST
704 || TREE_CODE (n2) != INTEGER_CST
705 || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1)
706 || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type)))
707 iter_type = long_long_unsigned_type_node;
708 }
709 }
710
711 if (i >= fd->collapse)
712 continue;
713
714 if (collapse_count && *collapse_count == NULL)
715 {
716 t = fold_binary (loop->cond_code, boolean_type_node,
717 fold_convert (TREE_TYPE (loop->v), loop->n1),
718 fold_convert (TREE_TYPE (loop->v), loop->n2));
719 if (t && integer_zerop (t))
720 count = build_zero_cst (long_long_unsigned_type_node);
721 else if ((i == 0 || count != NULL_TREE)
722 && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
723 && TREE_CONSTANT (loop->n1)
724 && TREE_CONSTANT (loop->n2)
725 && TREE_CODE (loop->step) == INTEGER_CST)
726 {
727 tree itype = TREE_TYPE (loop->v);
728
729 if (POINTER_TYPE_P (itype))
730 itype = signed_type_for (itype);
731 t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
732 t = fold_build2_loc (loc,
733 PLUS_EXPR, itype,
734 fold_convert_loc (loc, itype, loop->step), t);
735 t = fold_build2_loc (loc, PLUS_EXPR, itype, t,
736 fold_convert_loc (loc, itype, loop->n2));
737 t = fold_build2_loc (loc, MINUS_EXPR, itype, t,
738 fold_convert_loc (loc, itype, loop->n1));
739 if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
740 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype,
741 fold_build1_loc (loc, NEGATE_EXPR, itype, t),
742 fold_build1_loc (loc, NEGATE_EXPR, itype,
743 fold_convert_loc (loc, itype,
744 loop->step)));
745 else
746 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t,
747 fold_convert_loc (loc, itype, loop->step));
748 t = fold_convert_loc (loc, long_long_unsigned_type_node, t);
749 if (count != NULL_TREE)
750 count = fold_build2_loc (loc,
751 MULT_EXPR, long_long_unsigned_type_node,
752 count, t);
753 else
754 count = t;
755 if (TREE_CODE (count) != INTEGER_CST)
756 count = NULL_TREE;
757 }
758 else if (count && !integer_zerop (count))
759 count = NULL_TREE;
760 }
761 }
762
763 if (count
764 && !simd
765 && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
766 || fd->have_ordered))
767 {
768 if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
769 iter_type = long_long_unsigned_type_node;
770 else
771 iter_type = long_integer_type_node;
772 }
773 else if (collapse_iter && *collapse_iter != NULL)
774 iter_type = TREE_TYPE (*collapse_iter);
775 fd->iter_type = iter_type;
776 if (collapse_iter && *collapse_iter == NULL)
777 *collapse_iter = create_tmp_var (iter_type, ".iter");
778 if (collapse_count && *collapse_count == NULL)
779 {
780 if (count)
781 *collapse_count = fold_convert_loc (loc, iter_type, count);
782 else
783 *collapse_count = create_tmp_var (iter_type, ".count");
784 }
785
786 if (fd->collapse > 1 || (fd->ordered && loops))
787 {
788 fd->loop.v = *collapse_iter;
789 fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
790 fd->loop.n2 = *collapse_count;
791 fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
792 fd->loop.cond_code = LT_EXPR;
793 }
794 else if (loops)
795 loops[0] = fd->loop;
796 }
797
798
799 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
800 is the immediate dominator of PAR_ENTRY_BB, return true if there
801 are no data dependencies that would prevent expanding the parallel
802 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
803
804 When expanding a combined parallel+workshare region, the call to
805 the child function may need additional arguments in the case of
806 GIMPLE_OMP_FOR regions. In some cases, these arguments are
807 computed out of variables passed in from the parent to the child
808 via 'struct .omp_data_s'. For instance:
809
810 #pragma omp parallel for schedule (guided, i * 4)
811 for (j ...)
812
813 Is lowered into:
814
815 # BLOCK 2 (PAR_ENTRY_BB)
816 .omp_data_o.i = i;
817 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
818
819 # BLOCK 3 (WS_ENTRY_BB)
820 .omp_data_i = &.omp_data_o;
821 D.1667 = .omp_data_i->i;
822 D.1598 = D.1667 * 4;
823 #pragma omp for schedule (guided, D.1598)
824
825 When we outline the parallel region, the call to the child function
826 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
827 that value is computed *after* the call site. So, in principle we
828 cannot do the transformation.
829
830 To see whether the code in WS_ENTRY_BB blocks the combined
831 parallel+workshare call, we collect all the variables used in the
832 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
833 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
834 call.
835
836 FIXME. If we had the SSA form built at this point, we could merely
837 hoist the code in block 3 into block 2 and be done with it. But at
838 this point we don't have dataflow information and though we could
839 hack something up here, it is really not worth the aggravation. */
840
841 static bool
842 workshare_safe_to_combine_p (basic_block ws_entry_bb)
843 {
844 struct omp_for_data fd;
845 gimple *ws_stmt = last_stmt (ws_entry_bb);
846
847 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
848 return true;
849
850 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
851
852 extract_omp_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
853
854 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
855 return false;
856 if (fd.iter_type != long_integer_type_node)
857 return false;
858
859 /* FIXME. We give up too easily here. If any of these arguments
860 are not constants, they will likely involve variables that have
861 been mapped into fields of .omp_data_s for sharing with the child
862 function. With appropriate data flow, it would be possible to
863 see through this. */
864 if (!is_gimple_min_invariant (fd.loop.n1)
865 || !is_gimple_min_invariant (fd.loop.n2)
866 || !is_gimple_min_invariant (fd.loop.step)
867 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
868 return false;
869
870 return true;
871 }
872
873
874 static int omp_max_vf (void);
875
876 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
877 presence (SIMD_SCHEDULE). */
878
879 static tree
880 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
881 {
882 if (!simd_schedule)
883 return chunk_size;
884
885 int vf = omp_max_vf ();
886 if (vf == 1)
887 return chunk_size;
888
889 tree type = TREE_TYPE (chunk_size);
890 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
891 build_int_cst (type, vf - 1));
892 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
893 build_int_cst (type, -vf));
894 }
895
896
897 /* Collect additional arguments needed to emit a combined
898 parallel+workshare call. WS_STMT is the workshare directive being
899 expanded. */
900
901 static vec<tree, va_gc> *
902 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
903 {
904 tree t;
905 location_t loc = gimple_location (ws_stmt);
906 vec<tree, va_gc> *ws_args;
907
908 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
909 {
910 struct omp_for_data fd;
911 tree n1, n2;
912
913 extract_omp_for_data (for_stmt, &fd, NULL);
914 n1 = fd.loop.n1;
915 n2 = fd.loop.n2;
916
917 if (gimple_omp_for_combined_into_p (for_stmt))
918 {
919 tree innerc
920 = find_omp_clause (gimple_omp_parallel_clauses (par_stmt),
921 OMP_CLAUSE__LOOPTEMP_);
922 gcc_assert (innerc);
923 n1 = OMP_CLAUSE_DECL (innerc);
924 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
925 OMP_CLAUSE__LOOPTEMP_);
926 gcc_assert (innerc);
927 n2 = OMP_CLAUSE_DECL (innerc);
928 }
929
930 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
931
932 t = fold_convert_loc (loc, long_integer_type_node, n1);
933 ws_args->quick_push (t);
934
935 t = fold_convert_loc (loc, long_integer_type_node, n2);
936 ws_args->quick_push (t);
937
938 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
939 ws_args->quick_push (t);
940
941 if (fd.chunk_size)
942 {
943 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
944 t = omp_adjust_chunk_size (t, fd.simd_schedule);
945 ws_args->quick_push (t);
946 }
947
948 return ws_args;
949 }
950 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
951 {
952 /* Number of sections is equal to the number of edges from the
953 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
954 the exit of the sections region. */
955 basic_block bb = single_succ (gimple_bb (ws_stmt));
956 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
957 vec_alloc (ws_args, 1);
958 ws_args->quick_push (t);
959 return ws_args;
960 }
961
962 gcc_unreachable ();
963 }
964
965
966 /* Discover whether REGION is a combined parallel+workshare region. */
967
968 static void
969 determine_parallel_type (struct omp_region *region)
970 {
971 basic_block par_entry_bb, par_exit_bb;
972 basic_block ws_entry_bb, ws_exit_bb;
973
974 if (region == NULL || region->inner == NULL
975 || region->exit == NULL || region->inner->exit == NULL
976 || region->inner->cont == NULL)
977 return;
978
979 /* We only support parallel+for and parallel+sections. */
980 if (region->type != GIMPLE_OMP_PARALLEL
981 || (region->inner->type != GIMPLE_OMP_FOR
982 && region->inner->type != GIMPLE_OMP_SECTIONS))
983 return;
984
985 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
986 WS_EXIT_BB -> PAR_EXIT_BB. */
987 par_entry_bb = region->entry;
988 par_exit_bb = region->exit;
989 ws_entry_bb = region->inner->entry;
990 ws_exit_bb = region->inner->exit;
991
992 if (single_succ (par_entry_bb) == ws_entry_bb
993 && single_succ (ws_exit_bb) == par_exit_bb
994 && workshare_safe_to_combine_p (ws_entry_bb)
995 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
996 || (last_and_only_stmt (ws_entry_bb)
997 && last_and_only_stmt (par_exit_bb))))
998 {
999 gimple *par_stmt = last_stmt (par_entry_bb);
1000 gimple *ws_stmt = last_stmt (ws_entry_bb);
1001
1002 if (region->inner->type == GIMPLE_OMP_FOR)
1003 {
1004 /* If this is a combined parallel loop, we need to determine
1005 whether or not to use the combined library calls. There
1006 are two cases where we do not apply the transformation:
1007 static loops and any kind of ordered loop. In the first
1008 case, we already open code the loop so there is no need
1009 to do anything else. In the latter case, the combined
1010 parallel loop call would still need extra synchronization
1011 to implement ordered semantics, so there would not be any
1012 gain in using the combined call. */
1013 tree clauses = gimple_omp_for_clauses (ws_stmt);
1014 tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE);
1015 if (c == NULL
1016 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
1017 == OMP_CLAUSE_SCHEDULE_STATIC)
1018 || find_omp_clause (clauses, OMP_CLAUSE_ORDERED))
1019 {
1020 region->is_combined_parallel = false;
1021 region->inner->is_combined_parallel = false;
1022 return;
1023 }
1024 }
1025
1026 region->is_combined_parallel = true;
1027 region->inner->is_combined_parallel = true;
1028 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
1029 }
1030 }
1031
1032
1033 /* Return true if EXPR is variable sized. */
1034
1035 static inline bool
1036 is_variable_sized (const_tree expr)
1037 {
1038 return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1039 }
1040
1041 /* Return true if DECL is a reference type. */
1042
1043 static inline bool
1044 is_reference (tree decl)
1045 {
1046 return lang_hooks.decls.omp_privatize_by_reference (decl);
1047 }
1048
1049 /* Return the type of a decl. If the decl is reference type,
1050 return its base type. */
1051 static inline tree
1052 get_base_type (tree decl)
1053 {
1054 tree type = TREE_TYPE (decl);
1055 if (is_reference (decl))
1056 type = TREE_TYPE (type);
1057 return type;
1058 }
1059
1060 /* Lookup variables. The "maybe" form
1061 allows for the variable form to not have been entered, otherwise we
1062 assert that the variable must have been entered. */
1063
1064 static inline tree
1065 lookup_decl (tree var, omp_context *ctx)
1066 {
1067 tree *n = ctx->cb.decl_map->get (var);
1068 return *n;
1069 }
1070
1071 static inline tree
1072 maybe_lookup_decl (const_tree var, omp_context *ctx)
1073 {
1074 tree *n = ctx->cb.decl_map->get (const_cast<tree> (var));
1075 return n ? *n : NULL_TREE;
1076 }
1077
1078 static inline tree
1079 lookup_field (tree var, omp_context *ctx)
1080 {
1081 splay_tree_node n;
1082 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) var);
1083 return (tree) n->value;
1084 }
1085
1086 static inline tree
1087 lookup_sfield (splay_tree_key key, omp_context *ctx)
1088 {
1089 splay_tree_node n;
1090 n = splay_tree_lookup (ctx->sfield_map
1091 ? ctx->sfield_map : ctx->field_map, key);
1092 return (tree) n->value;
1093 }
1094
1095 static inline tree
1096 lookup_sfield (tree var, omp_context *ctx)
1097 {
1098 return lookup_sfield ((splay_tree_key) var, ctx);
1099 }
1100
1101 static inline tree
1102 maybe_lookup_field (splay_tree_key key, omp_context *ctx)
1103 {
1104 splay_tree_node n;
1105 n = splay_tree_lookup (ctx->field_map, key);
1106 return n ? (tree) n->value : NULL_TREE;
1107 }
1108
1109 static inline tree
1110 maybe_lookup_field (tree var, omp_context *ctx)
1111 {
1112 return maybe_lookup_field ((splay_tree_key) var, ctx);
1113 }
1114
1115 /* Return true if DECL should be copied by pointer. SHARED_CTX is
1116 the parallel context if DECL is to be shared. */
1117
1118 static bool
1119 use_pointer_for_field (tree decl, omp_context *shared_ctx)
1120 {
1121 if (AGGREGATE_TYPE_P (TREE_TYPE (decl)))
1122 return true;
1123
1124 /* We can only use copy-in/copy-out semantics for shared variables
1125 when we know the value is not accessible from an outer scope. */
1126 if (shared_ctx)
1127 {
1128 gcc_assert (!is_gimple_omp_oacc (shared_ctx->stmt));
1129
1130 /* ??? Trivially accessible from anywhere. But why would we even
1131 be passing an address in this case? Should we simply assert
1132 this to be false, or should we have a cleanup pass that removes
1133 these from the list of mappings? */
1134 if (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
1135 return true;
1136
1137 /* For variables with DECL_HAS_VALUE_EXPR_P set, we cannot tell
1138 without analyzing the expression whether or not its location
1139 is accessible to anyone else. In the case of nested parallel
1140 regions it certainly may be. */
1141 if (TREE_CODE (decl) != RESULT_DECL && DECL_HAS_VALUE_EXPR_P (decl))
1142 return true;
1143
1144 /* Do not use copy-in/copy-out for variables that have their
1145 address taken. */
1146 if (TREE_ADDRESSABLE (decl))
1147 return true;
1148
1149 /* lower_send_shared_vars only uses copy-in, but not copy-out
1150 for these. */
1151 if (TREE_READONLY (decl)
1152 || ((TREE_CODE (decl) == RESULT_DECL
1153 || TREE_CODE (decl) == PARM_DECL)
1154 && DECL_BY_REFERENCE (decl)))
1155 return false;
1156
1157 /* Disallow copy-in/out in nested parallel if
1158 decl is shared in outer parallel, otherwise
1159 each thread could store the shared variable
1160 in its own copy-in location, making the
1161 variable no longer really shared. */
1162 if (shared_ctx->is_nested)
1163 {
1164 omp_context *up;
1165
1166 for (up = shared_ctx->outer; up; up = up->outer)
1167 if (is_taskreg_ctx (up) && maybe_lookup_decl (decl, up))
1168 break;
1169
1170 if (up)
1171 {
1172 tree c;
1173
1174 for (c = gimple_omp_taskreg_clauses (up->stmt);
1175 c; c = OMP_CLAUSE_CHAIN (c))
1176 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
1177 && OMP_CLAUSE_DECL (c) == decl)
1178 break;
1179
1180 if (c)
1181 goto maybe_mark_addressable_and_ret;
1182 }
1183 }
1184
1185 /* For tasks avoid using copy-in/out. As tasks can be
1186 deferred or executed in different thread, when GOMP_task
1187 returns, the task hasn't necessarily terminated. */
1188 if (is_task_ctx (shared_ctx))
1189 {
1190 tree outer;
1191 maybe_mark_addressable_and_ret:
1192 outer = maybe_lookup_decl_in_outer_ctx (decl, shared_ctx);
1193 if (is_gimple_reg (outer) && !omp_member_access_dummy_var (outer))
1194 {
1195 /* Taking address of OUTER in lower_send_shared_vars
1196 might need regimplification of everything that uses the
1197 variable. */
1198 if (!task_shared_vars)
1199 task_shared_vars = BITMAP_ALLOC (NULL);
1200 bitmap_set_bit (task_shared_vars, DECL_UID (outer));
1201 TREE_ADDRESSABLE (outer) = 1;
1202 }
1203 return true;
1204 }
1205 }
1206
1207 return false;
1208 }
1209
1210 /* Construct a new automatic decl similar to VAR. */
1211
1212 static tree
1213 omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx)
1214 {
1215 tree copy = copy_var_decl (var, name, type);
1216
1217 DECL_CONTEXT (copy) = current_function_decl;
1218 DECL_CHAIN (copy) = ctx->block_vars;
1219 /* If VAR is listed in task_shared_vars, it means it wasn't
1220 originally addressable and is just because task needs to take
1221 it's address. But we don't need to take address of privatizations
1222 from that var. */
1223 if (TREE_ADDRESSABLE (var)
1224 && task_shared_vars
1225 && bitmap_bit_p (task_shared_vars, DECL_UID (var)))
1226 TREE_ADDRESSABLE (copy) = 0;
1227 ctx->block_vars = copy;
1228
1229 return copy;
1230 }
1231
1232 static tree
1233 omp_copy_decl_1 (tree var, omp_context *ctx)
1234 {
1235 return omp_copy_decl_2 (var, DECL_NAME (var), TREE_TYPE (var), ctx);
1236 }
1237
1238 /* Build COMPONENT_REF and set TREE_THIS_VOLATILE and TREE_READONLY on it
1239 as appropriate. */
1240 static tree
1241 omp_build_component_ref (tree obj, tree field)
1242 {
1243 tree ret = build3 (COMPONENT_REF, TREE_TYPE (field), obj, field, NULL);
1244 if (TREE_THIS_VOLATILE (field))
1245 TREE_THIS_VOLATILE (ret) |= 1;
1246 if (TREE_READONLY (field))
1247 TREE_READONLY (ret) |= 1;
1248 return ret;
1249 }
1250
1251 /* Build tree nodes to access the field for VAR on the receiver side. */
1252
1253 static tree
1254 build_receiver_ref (tree var, bool by_ref, omp_context *ctx)
1255 {
1256 tree x, field = lookup_field (var, ctx);
1257
1258 /* If the receiver record type was remapped in the child function,
1259 remap the field into the new record type. */
1260 x = maybe_lookup_field (field, ctx);
1261 if (x != NULL)
1262 field = x;
1263
1264 x = build_simple_mem_ref (ctx->receiver_decl);
1265 TREE_THIS_NOTRAP (x) = 1;
1266 x = omp_build_component_ref (x, field);
1267 if (by_ref)
1268 {
1269 x = build_simple_mem_ref (x);
1270 TREE_THIS_NOTRAP (x) = 1;
1271 }
1272
1273 return x;
1274 }
1275
1276 /* Build tree nodes to access VAR in the scope outer to CTX. In the case
1277 of a parallel, this is a component reference; for workshare constructs
1278 this is some variable. */
1279
1280 static tree
1281 build_outer_var_ref (tree var, omp_context *ctx, bool lastprivate = false)
1282 {
1283 tree x;
1284
1285 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)))
1286 x = var;
1287 else if (is_variable_sized (var))
1288 {
1289 x = TREE_OPERAND (DECL_VALUE_EXPR (var), 0);
1290 x = build_outer_var_ref (x, ctx, lastprivate);
1291 x = build_simple_mem_ref (x);
1292 }
1293 else if (is_taskreg_ctx (ctx))
1294 {
1295 bool by_ref = use_pointer_for_field (var, NULL);
1296 x = build_receiver_ref (var, by_ref, ctx);
1297 }
1298 else if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
1299 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
1300 {
1301 /* #pragma omp simd isn't a worksharing construct, and can reference even
1302 private vars in its linear etc. clauses. */
1303 x = NULL_TREE;
1304 if (ctx->outer && is_taskreg_ctx (ctx))
1305 x = lookup_decl (var, ctx->outer);
1306 else if (ctx->outer)
1307 x = maybe_lookup_decl_in_outer_ctx (var, ctx);
1308 if (x == NULL_TREE)
1309 x = var;
1310 }
1311 else if (lastprivate && is_taskloop_ctx (ctx))
1312 {
1313 gcc_assert (ctx->outer);
1314 splay_tree_node n
1315 = splay_tree_lookup (ctx->outer->field_map,
1316 (splay_tree_key) &DECL_UID (var));
1317 if (n == NULL)
1318 {
1319 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx->outer)))
1320 x = var;
1321 else
1322 x = lookup_decl (var, ctx->outer);
1323 }
1324 else
1325 {
1326 tree field = (tree) n->value;
1327 /* If the receiver record type was remapped in the child function,
1328 remap the field into the new record type. */
1329 x = maybe_lookup_field (field, ctx->outer);
1330 if (x != NULL)
1331 field = x;
1332
1333 x = build_simple_mem_ref (ctx->outer->receiver_decl);
1334 x = omp_build_component_ref (x, field);
1335 if (use_pointer_for_field (var, ctx->outer))
1336 x = build_simple_mem_ref (x);
1337 }
1338 }
1339 else if (ctx->outer)
1340 {
1341 omp_context *outer = ctx->outer;
1342 if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY)
1343 {
1344 outer = outer->outer;
1345 gcc_assert (outer
1346 && gimple_code (outer->stmt) != GIMPLE_OMP_GRID_BODY);
1347 }
1348 x = lookup_decl (var, outer);
1349 }
1350 else if (is_reference (var))
1351 /* This can happen with orphaned constructs. If var is reference, it is
1352 possible it is shared and as such valid. */
1353 x = var;
1354 else if (omp_member_access_dummy_var (var))
1355 x = var;
1356 else
1357 gcc_unreachable ();
1358
1359 if (x == var)
1360 {
1361 tree t = omp_member_access_dummy_var (var);
1362 if (t)
1363 {
1364 x = DECL_VALUE_EXPR (var);
1365 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx);
1366 if (o != t)
1367 x = unshare_and_remap (x, t, o);
1368 else
1369 x = unshare_expr (x);
1370 }
1371 }
1372
1373 if (is_reference (var))
1374 x = build_simple_mem_ref (x);
1375
1376 return x;
1377 }
1378
1379 /* Build tree nodes to access the field for VAR on the sender side. */
1380
1381 static tree
1382 build_sender_ref (splay_tree_key key, omp_context *ctx)
1383 {
1384 tree field = lookup_sfield (key, ctx);
1385 return omp_build_component_ref (ctx->sender_decl, field);
1386 }
1387
1388 static tree
1389 build_sender_ref (tree var, omp_context *ctx)
1390 {
1391 return build_sender_ref ((splay_tree_key) var, ctx);
1392 }
1393
1394 /* Add a new field for VAR inside the structure CTX->SENDER_DECL. If
1395 BASE_POINTERS_RESTRICT, declare the field with restrict. */
1396
1397 static void
1398 install_var_field (tree var, bool by_ref, int mask, omp_context *ctx,
1399 bool base_pointers_restrict = false)
1400 {
1401 tree field, type, sfield = NULL_TREE;
1402 splay_tree_key key = (splay_tree_key) var;
1403
1404 if ((mask & 8) != 0)
1405 {
1406 key = (splay_tree_key) &DECL_UID (var);
1407 gcc_checking_assert (key != (splay_tree_key) var);
1408 }
1409 gcc_assert ((mask & 1) == 0
1410 || !splay_tree_lookup (ctx->field_map, key));
1411 gcc_assert ((mask & 2) == 0 || !ctx->sfield_map
1412 || !splay_tree_lookup (ctx->sfield_map, key));
1413 gcc_assert ((mask & 3) == 3
1414 || !is_gimple_omp_oacc (ctx->stmt));
1415
1416 type = TREE_TYPE (var);
1417 /* Prevent redeclaring the var in the split-off function with a restrict
1418 pointer type. Note that we only clear type itself, restrict qualifiers in
1419 the pointed-to type will be ignored by points-to analysis. */
1420 if (POINTER_TYPE_P (type)
1421 && TYPE_RESTRICT (type))
1422 type = build_qualified_type (type, TYPE_QUALS (type) & ~TYPE_QUAL_RESTRICT);
1423
1424 if (mask & 4)
1425 {
1426 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1427 type = build_pointer_type (build_pointer_type (type));
1428 }
1429 else if (by_ref)
1430 {
1431 type = build_pointer_type (type);
1432 if (base_pointers_restrict)
1433 type = build_qualified_type (type, TYPE_QUAL_RESTRICT);
1434 }
1435 else if ((mask & 3) == 1 && is_reference (var))
1436 type = TREE_TYPE (type);
1437
1438 field = build_decl (DECL_SOURCE_LOCATION (var),
1439 FIELD_DECL, DECL_NAME (var), type);
1440
1441 /* Remember what variable this field was created for. This does have a
1442 side effect of making dwarf2out ignore this member, so for helpful
1443 debugging we clear it later in delete_omp_context. */
1444 DECL_ABSTRACT_ORIGIN (field) = var;
1445 if (type == TREE_TYPE (var))
1446 {
1447 DECL_ALIGN (field) = DECL_ALIGN (var);
1448 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1449 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1450 }
1451 else
1452 DECL_ALIGN (field) = TYPE_ALIGN (type);
1453
1454 if ((mask & 3) == 3)
1455 {
1456 insert_field_into_struct (ctx->record_type, field);
1457 if (ctx->srecord_type)
1458 {
1459 sfield = build_decl (DECL_SOURCE_LOCATION (var),
1460 FIELD_DECL, DECL_NAME (var), type);
1461 DECL_ABSTRACT_ORIGIN (sfield) = var;
1462 DECL_ALIGN (sfield) = DECL_ALIGN (field);
1463 DECL_USER_ALIGN (sfield) = DECL_USER_ALIGN (field);
1464 TREE_THIS_VOLATILE (sfield) = TREE_THIS_VOLATILE (field);
1465 insert_field_into_struct (ctx->srecord_type, sfield);
1466 }
1467 }
1468 else
1469 {
1470 if (ctx->srecord_type == NULL_TREE)
1471 {
1472 tree t;
1473
1474 ctx->srecord_type = lang_hooks.types.make_type (RECORD_TYPE);
1475 ctx->sfield_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
1476 for (t = TYPE_FIELDS (ctx->record_type); t ; t = TREE_CHAIN (t))
1477 {
1478 sfield = build_decl (DECL_SOURCE_LOCATION (t),
1479 FIELD_DECL, DECL_NAME (t), TREE_TYPE (t));
1480 DECL_ABSTRACT_ORIGIN (sfield) = DECL_ABSTRACT_ORIGIN (t);
1481 insert_field_into_struct (ctx->srecord_type, sfield);
1482 splay_tree_insert (ctx->sfield_map,
1483 (splay_tree_key) DECL_ABSTRACT_ORIGIN (t),
1484 (splay_tree_value) sfield);
1485 }
1486 }
1487 sfield = field;
1488 insert_field_into_struct ((mask & 1) ? ctx->record_type
1489 : ctx->srecord_type, field);
1490 }
1491
1492 if (mask & 1)
1493 splay_tree_insert (ctx->field_map, key, (splay_tree_value) field);
1494 if ((mask & 2) && ctx->sfield_map)
1495 splay_tree_insert (ctx->sfield_map, key, (splay_tree_value) sfield);
1496 }
1497
1498 static tree
1499 install_var_local (tree var, omp_context *ctx)
1500 {
1501 tree new_var = omp_copy_decl_1 (var, ctx);
1502 insert_decl_map (&ctx->cb, var, new_var);
1503 return new_var;
1504 }
1505
1506 /* Adjust the replacement for DECL in CTX for the new context. This means
1507 copying the DECL_VALUE_EXPR, and fixing up the type. */
1508
1509 static void
1510 fixup_remapped_decl (tree decl, omp_context *ctx, bool private_debug)
1511 {
1512 tree new_decl, size;
1513
1514 new_decl = lookup_decl (decl, ctx);
1515
1516 TREE_TYPE (new_decl) = remap_type (TREE_TYPE (decl), &ctx->cb);
1517
1518 if ((!TREE_CONSTANT (DECL_SIZE (new_decl)) || private_debug)
1519 && DECL_HAS_VALUE_EXPR_P (decl))
1520 {
1521 tree ve = DECL_VALUE_EXPR (decl);
1522 walk_tree (&ve, copy_tree_body_r, &ctx->cb, NULL);
1523 SET_DECL_VALUE_EXPR (new_decl, ve);
1524 DECL_HAS_VALUE_EXPR_P (new_decl) = 1;
1525 }
1526
1527 if (!TREE_CONSTANT (DECL_SIZE (new_decl)))
1528 {
1529 size = remap_decl (DECL_SIZE (decl), &ctx->cb);
1530 if (size == error_mark_node)
1531 size = TYPE_SIZE (TREE_TYPE (new_decl));
1532 DECL_SIZE (new_decl) = size;
1533
1534 size = remap_decl (DECL_SIZE_UNIT (decl), &ctx->cb);
1535 if (size == error_mark_node)
1536 size = TYPE_SIZE_UNIT (TREE_TYPE (new_decl));
1537 DECL_SIZE_UNIT (new_decl) = size;
1538 }
1539 }
1540
1541 /* The callback for remap_decl. Search all containing contexts for a
1542 mapping of the variable; this avoids having to duplicate the splay
1543 tree ahead of time. We know a mapping doesn't already exist in the
1544 given context. Create new mappings to implement default semantics. */
1545
1546 static tree
1547 omp_copy_decl (tree var, copy_body_data *cb)
1548 {
1549 omp_context *ctx = (omp_context *) cb;
1550 tree new_var;
1551
1552 if (TREE_CODE (var) == LABEL_DECL)
1553 {
1554 new_var = create_artificial_label (DECL_SOURCE_LOCATION (var));
1555 DECL_CONTEXT (new_var) = current_function_decl;
1556 insert_decl_map (&ctx->cb, var, new_var);
1557 return new_var;
1558 }
1559
1560 while (!is_taskreg_ctx (ctx))
1561 {
1562 ctx = ctx->outer;
1563 if (ctx == NULL)
1564 return var;
1565 new_var = maybe_lookup_decl (var, ctx);
1566 if (new_var)
1567 return new_var;
1568 }
1569
1570 if (is_global_var (var) || decl_function_context (var) != ctx->cb.src_fn)
1571 return var;
1572
1573 return error_mark_node;
1574 }
1575
1576
1577 /* Debugging dumps for parallel regions. */
1578 void dump_omp_region (FILE *, struct omp_region *, int);
1579 void debug_omp_region (struct omp_region *);
1580 void debug_all_omp_regions (void);
1581
1582 /* Dump the parallel region tree rooted at REGION. */
1583
1584 void
1585 dump_omp_region (FILE *file, struct omp_region *region, int indent)
1586 {
1587 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
1588 gimple_code_name[region->type]);
1589
1590 if (region->inner)
1591 dump_omp_region (file, region->inner, indent + 4);
1592
1593 if (region->cont)
1594 {
1595 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
1596 region->cont->index);
1597 }
1598
1599 if (region->exit)
1600 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
1601 region->exit->index);
1602 else
1603 fprintf (file, "%*s[no exit marker]\n", indent, "");
1604
1605 if (region->next)
1606 dump_omp_region (file, region->next, indent);
1607 }
1608
1609 DEBUG_FUNCTION void
1610 debug_omp_region (struct omp_region *region)
1611 {
1612 dump_omp_region (stderr, region, 0);
1613 }
1614
1615 DEBUG_FUNCTION void
1616 debug_all_omp_regions (void)
1617 {
1618 dump_omp_region (stderr, root_omp_region, 0);
1619 }
1620
1621
1622 /* Create a new parallel region starting at STMT inside region PARENT. */
1623
1624 static struct omp_region *
1625 new_omp_region (basic_block bb, enum gimple_code type,
1626 struct omp_region *parent)
1627 {
1628 struct omp_region *region = XCNEW (struct omp_region);
1629
1630 region->outer = parent;
1631 region->entry = bb;
1632 region->type = type;
1633
1634 if (parent)
1635 {
1636 /* This is a nested region. Add it to the list of inner
1637 regions in PARENT. */
1638 region->next = parent->inner;
1639 parent->inner = region;
1640 }
1641 else
1642 {
1643 /* This is a toplevel region. Add it to the list of toplevel
1644 regions in ROOT_OMP_REGION. */
1645 region->next = root_omp_region;
1646 root_omp_region = region;
1647 }
1648
1649 return region;
1650 }
1651
1652 /* Release the memory associated with the region tree rooted at REGION. */
1653
1654 static void
1655 free_omp_region_1 (struct omp_region *region)
1656 {
1657 struct omp_region *i, *n;
1658
1659 for (i = region->inner; i ; i = n)
1660 {
1661 n = i->next;
1662 free_omp_region_1 (i);
1663 }
1664
1665 free (region);
1666 }
1667
1668 /* Release the memory for the entire omp region tree. */
1669
1670 void
1671 free_omp_regions (void)
1672 {
1673 struct omp_region *r, *n;
1674 for (r = root_omp_region; r ; r = n)
1675 {
1676 n = r->next;
1677 free_omp_region_1 (r);
1678 }
1679 root_omp_region = NULL;
1680 }
1681
1682
1683 /* Create a new context, with OUTER_CTX being the surrounding context. */
1684
1685 static omp_context *
1686 new_omp_context (gimple *stmt, omp_context *outer_ctx)
1687 {
1688 omp_context *ctx = XCNEW (omp_context);
1689
1690 splay_tree_insert (all_contexts, (splay_tree_key) stmt,
1691 (splay_tree_value) ctx);
1692 ctx->stmt = stmt;
1693
1694 if (outer_ctx)
1695 {
1696 ctx->outer = outer_ctx;
1697 ctx->cb = outer_ctx->cb;
1698 ctx->cb.block = NULL;
1699 ctx->depth = outer_ctx->depth + 1;
1700 }
1701 else
1702 {
1703 ctx->cb.src_fn = current_function_decl;
1704 ctx->cb.dst_fn = current_function_decl;
1705 ctx->cb.src_node = cgraph_node::get (current_function_decl);
1706 gcc_checking_assert (ctx->cb.src_node);
1707 ctx->cb.dst_node = ctx->cb.src_node;
1708 ctx->cb.src_cfun = cfun;
1709 ctx->cb.copy_decl = omp_copy_decl;
1710 ctx->cb.eh_lp_nr = 0;
1711 ctx->cb.transform_call_graph_edges = CB_CGE_MOVE;
1712 ctx->depth = 1;
1713 }
1714
1715 ctx->cb.decl_map = new hash_map<tree, tree>;
1716
1717 return ctx;
1718 }
1719
1720 static gimple_seq maybe_catch_exception (gimple_seq);
1721
1722 /* Finalize task copyfn. */
1723
1724 static void
1725 finalize_task_copyfn (gomp_task *task_stmt)
1726 {
1727 struct function *child_cfun;
1728 tree child_fn;
1729 gimple_seq seq = NULL, new_seq;
1730 gbind *bind;
1731
1732 child_fn = gimple_omp_task_copy_fn (task_stmt);
1733 if (child_fn == NULL_TREE)
1734 return;
1735
1736 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1737 DECL_STRUCT_FUNCTION (child_fn)->curr_properties = cfun->curr_properties;
1738
1739 push_cfun (child_cfun);
1740 bind = gimplify_body (child_fn, false);
1741 gimple_seq_add_stmt (&seq, bind);
1742 new_seq = maybe_catch_exception (seq);
1743 if (new_seq != seq)
1744 {
1745 bind = gimple_build_bind (NULL, new_seq, NULL);
1746 seq = NULL;
1747 gimple_seq_add_stmt (&seq, bind);
1748 }
1749 gimple_set_body (child_fn, seq);
1750 pop_cfun ();
1751
1752 /* Inform the callgraph about the new function. */
1753 cgraph_node *node = cgraph_node::get_create (child_fn);
1754 node->parallelized_function = 1;
1755 cgraph_node::add_new_function (child_fn, false);
1756 }
1757
1758 /* Destroy a omp_context data structures. Called through the splay tree
1759 value delete callback. */
1760
1761 static void
1762 delete_omp_context (splay_tree_value value)
1763 {
1764 omp_context *ctx = (omp_context *) value;
1765
1766 delete ctx->cb.decl_map;
1767
1768 if (ctx->field_map)
1769 splay_tree_delete (ctx->field_map);
1770 if (ctx->sfield_map)
1771 splay_tree_delete (ctx->sfield_map);
1772
1773 /* We hijacked DECL_ABSTRACT_ORIGIN earlier. We need to clear it before
1774 it produces corrupt debug information. */
1775 if (ctx->record_type)
1776 {
1777 tree t;
1778 for (t = TYPE_FIELDS (ctx->record_type); t ; t = DECL_CHAIN (t))
1779 DECL_ABSTRACT_ORIGIN (t) = NULL;
1780 }
1781 if (ctx->srecord_type)
1782 {
1783 tree t;
1784 for (t = TYPE_FIELDS (ctx->srecord_type); t ; t = DECL_CHAIN (t))
1785 DECL_ABSTRACT_ORIGIN (t) = NULL;
1786 }
1787
1788 if (is_task_ctx (ctx))
1789 finalize_task_copyfn (as_a <gomp_task *> (ctx->stmt));
1790
1791 XDELETE (ctx);
1792 }
1793
1794 /* Fix up RECEIVER_DECL with a type that has been remapped to the child
1795 context. */
1796
1797 static void
1798 fixup_child_record_type (omp_context *ctx)
1799 {
1800 tree f, type = ctx->record_type;
1801
1802 if (!ctx->receiver_decl)
1803 return;
1804 /* ??? It isn't sufficient to just call remap_type here, because
1805 variably_modified_type_p doesn't work the way we expect for
1806 record types. Testing each field for whether it needs remapping
1807 and creating a new record by hand works, however. */
1808 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
1809 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
1810 break;
1811 if (f)
1812 {
1813 tree name, new_fields = NULL;
1814
1815 type = lang_hooks.types.make_type (RECORD_TYPE);
1816 name = DECL_NAME (TYPE_NAME (ctx->record_type));
1817 name = build_decl (DECL_SOURCE_LOCATION (ctx->receiver_decl),
1818 TYPE_DECL, name, type);
1819 TYPE_NAME (type) = name;
1820
1821 for (f = TYPE_FIELDS (ctx->record_type); f ; f = DECL_CHAIN (f))
1822 {
1823 tree new_f = copy_node (f);
1824 DECL_CONTEXT (new_f) = type;
1825 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &ctx->cb);
1826 DECL_CHAIN (new_f) = new_fields;
1827 walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &ctx->cb, NULL);
1828 walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r,
1829 &ctx->cb, NULL);
1830 walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
1831 &ctx->cb, NULL);
1832 new_fields = new_f;
1833
1834 /* Arrange to be able to look up the receiver field
1835 given the sender field. */
1836 splay_tree_insert (ctx->field_map, (splay_tree_key) f,
1837 (splay_tree_value) new_f);
1838 }
1839 TYPE_FIELDS (type) = nreverse (new_fields);
1840 layout_type (type);
1841 }
1842
1843 /* In a target region we never modify any of the pointers in *.omp_data_i,
1844 so attempt to help the optimizers. */
1845 if (is_gimple_omp_offloaded (ctx->stmt))
1846 type = build_qualified_type (type, TYPE_QUAL_CONST);
1847
1848 TREE_TYPE (ctx->receiver_decl)
1849 = build_qualified_type (build_reference_type (type), TYPE_QUAL_RESTRICT);
1850 }
1851
1852 /* Instantiate decls as necessary in CTX to satisfy the data sharing
1853 specified by CLAUSES. If BASE_POINTERS_RESTRICT, install var field with
1854 restrict. */
1855
1856 static void
1857 scan_sharing_clauses (tree clauses, omp_context *ctx,
1858 bool base_pointers_restrict = false)
1859 {
1860 tree c, decl;
1861 bool scan_array_reductions = false;
1862
1863 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
1864 {
1865 bool by_ref;
1866
1867 switch (OMP_CLAUSE_CODE (c))
1868 {
1869 case OMP_CLAUSE_PRIVATE:
1870 decl = OMP_CLAUSE_DECL (c);
1871 if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
1872 goto do_private;
1873 else if (!is_variable_sized (decl))
1874 install_var_local (decl, ctx);
1875 break;
1876
1877 case OMP_CLAUSE_SHARED:
1878 decl = OMP_CLAUSE_DECL (c);
1879 /* Ignore shared directives in teams construct. */
1880 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
1881 {
1882 /* Global variables don't need to be copied,
1883 the receiver side will use them directly. */
1884 tree odecl = maybe_lookup_decl_in_outer_ctx (decl, ctx);
1885 if (is_global_var (odecl))
1886 break;
1887 insert_decl_map (&ctx->cb, decl, odecl);
1888 break;
1889 }
1890 gcc_assert (is_taskreg_ctx (ctx));
1891 gcc_assert (!COMPLETE_TYPE_P (TREE_TYPE (decl))
1892 || !is_variable_sized (decl));
1893 /* Global variables don't need to be copied,
1894 the receiver side will use them directly. */
1895 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
1896 break;
1897 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
1898 {
1899 use_pointer_for_field (decl, ctx);
1900 break;
1901 }
1902 by_ref = use_pointer_for_field (decl, NULL);
1903 if ((! TREE_READONLY (decl) && !OMP_CLAUSE_SHARED_READONLY (c))
1904 || TREE_ADDRESSABLE (decl)
1905 || by_ref
1906 || is_reference (decl))
1907 {
1908 by_ref = use_pointer_for_field (decl, ctx);
1909 install_var_field (decl, by_ref, 3, ctx);
1910 install_var_local (decl, ctx);
1911 break;
1912 }
1913 /* We don't need to copy const scalar vars back. */
1914 OMP_CLAUSE_SET_CODE (c, OMP_CLAUSE_FIRSTPRIVATE);
1915 goto do_private;
1916
1917 case OMP_CLAUSE_REDUCTION:
1918 decl = OMP_CLAUSE_DECL (c);
1919 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
1920 && TREE_CODE (decl) == MEM_REF)
1921 {
1922 tree t = TREE_OPERAND (decl, 0);
1923 if (TREE_CODE (t) == POINTER_PLUS_EXPR)
1924 t = TREE_OPERAND (t, 0);
1925 if (TREE_CODE (t) == INDIRECT_REF
1926 || TREE_CODE (t) == ADDR_EXPR)
1927 t = TREE_OPERAND (t, 0);
1928 install_var_local (t, ctx);
1929 if (is_taskreg_ctx (ctx)
1930 && !is_global_var (maybe_lookup_decl_in_outer_ctx (t, ctx))
1931 && !is_variable_sized (t))
1932 {
1933 by_ref = use_pointer_for_field (t, ctx);
1934 install_var_field (t, by_ref, 3, ctx);
1935 }
1936 break;
1937 }
1938 goto do_private;
1939
1940 case OMP_CLAUSE_LASTPRIVATE:
1941 /* Let the corresponding firstprivate clause create
1942 the variable. */
1943 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
1944 break;
1945 /* FALLTHRU */
1946
1947 case OMP_CLAUSE_FIRSTPRIVATE:
1948 case OMP_CLAUSE_LINEAR:
1949 decl = OMP_CLAUSE_DECL (c);
1950 do_private:
1951 if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
1952 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
1953 && is_gimple_omp_offloaded (ctx->stmt))
1954 {
1955 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
1956 install_var_field (decl, !is_reference (decl), 3, ctx);
1957 else if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
1958 install_var_field (decl, true, 3, ctx);
1959 else
1960 install_var_field (decl, false, 3, ctx);
1961 }
1962 if (is_variable_sized (decl))
1963 {
1964 if (is_task_ctx (ctx))
1965 install_var_field (decl, false, 1, ctx);
1966 break;
1967 }
1968 else if (is_taskreg_ctx (ctx))
1969 {
1970 bool global
1971 = is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx));
1972 by_ref = use_pointer_for_field (decl, NULL);
1973
1974 if (is_task_ctx (ctx)
1975 && (global || by_ref || is_reference (decl)))
1976 {
1977 install_var_field (decl, false, 1, ctx);
1978 if (!global)
1979 install_var_field (decl, by_ref, 2, ctx);
1980 }
1981 else if (!global)
1982 install_var_field (decl, by_ref, 3, ctx);
1983 }
1984 install_var_local (decl, ctx);
1985 break;
1986
1987 case OMP_CLAUSE_USE_DEVICE_PTR:
1988 decl = OMP_CLAUSE_DECL (c);
1989 if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
1990 install_var_field (decl, true, 3, ctx);
1991 else
1992 install_var_field (decl, false, 3, ctx);
1993 if (DECL_SIZE (decl)
1994 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
1995 {
1996 tree decl2 = DECL_VALUE_EXPR (decl);
1997 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
1998 decl2 = TREE_OPERAND (decl2, 0);
1999 gcc_assert (DECL_P (decl2));
2000 install_var_local (decl2, ctx);
2001 }
2002 install_var_local (decl, ctx);
2003 break;
2004
2005 case OMP_CLAUSE_IS_DEVICE_PTR:
2006 decl = OMP_CLAUSE_DECL (c);
2007 goto do_private;
2008
2009 case OMP_CLAUSE__LOOPTEMP_:
2010 gcc_assert (is_taskreg_ctx (ctx));
2011 decl = OMP_CLAUSE_DECL (c);
2012 install_var_field (decl, false, 3, ctx);
2013 install_var_local (decl, ctx);
2014 break;
2015
2016 case OMP_CLAUSE_COPYPRIVATE:
2017 case OMP_CLAUSE_COPYIN:
2018 decl = OMP_CLAUSE_DECL (c);
2019 by_ref = use_pointer_for_field (decl, NULL);
2020 install_var_field (decl, by_ref, 3, ctx);
2021 break;
2022
2023 case OMP_CLAUSE_DEFAULT:
2024 ctx->default_kind = OMP_CLAUSE_DEFAULT_KIND (c);
2025 break;
2026
2027 case OMP_CLAUSE_FINAL:
2028 case OMP_CLAUSE_IF:
2029 case OMP_CLAUSE_NUM_THREADS:
2030 case OMP_CLAUSE_NUM_TEAMS:
2031 case OMP_CLAUSE_THREAD_LIMIT:
2032 case OMP_CLAUSE_DEVICE:
2033 case OMP_CLAUSE_SCHEDULE:
2034 case OMP_CLAUSE_DIST_SCHEDULE:
2035 case OMP_CLAUSE_DEPEND:
2036 case OMP_CLAUSE_PRIORITY:
2037 case OMP_CLAUSE_GRAINSIZE:
2038 case OMP_CLAUSE_NUM_TASKS:
2039 case OMP_CLAUSE__CILK_FOR_COUNT_:
2040 case OMP_CLAUSE_NUM_GANGS:
2041 case OMP_CLAUSE_NUM_WORKERS:
2042 case OMP_CLAUSE_VECTOR_LENGTH:
2043 if (ctx->outer)
2044 scan_omp_op (&OMP_CLAUSE_OPERAND (c, 0), ctx->outer);
2045 break;
2046
2047 case OMP_CLAUSE_TO:
2048 case OMP_CLAUSE_FROM:
2049 case OMP_CLAUSE_MAP:
2050 if (ctx->outer)
2051 scan_omp_op (&OMP_CLAUSE_SIZE (c), ctx->outer);
2052 decl = OMP_CLAUSE_DECL (c);
2053 /* Global variables with "omp declare target" attribute
2054 don't need to be copied, the receiver side will use them
2055 directly. However, global variables with "omp declare target link"
2056 attribute need to be copied. */
2057 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2058 && DECL_P (decl)
2059 && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
2060 && (OMP_CLAUSE_MAP_KIND (c)
2061 != GOMP_MAP_FIRSTPRIVATE_REFERENCE))
2062 || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2063 && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
2064 && varpool_node::get_create (decl)->offloadable
2065 && !lookup_attribute ("omp declare target link",
2066 DECL_ATTRIBUTES (decl)))
2067 break;
2068 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2069 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER)
2070 {
2071 /* Ignore GOMP_MAP_POINTER kind for arrays in regions that are
2072 not offloaded; there is nothing to map for those. */
2073 if (!is_gimple_omp_offloaded (ctx->stmt)
2074 && !POINTER_TYPE_P (TREE_TYPE (decl))
2075 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
2076 break;
2077 }
2078 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2079 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
2080 || (OMP_CLAUSE_MAP_KIND (c)
2081 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
2082 {
2083 if (TREE_CODE (decl) == COMPONENT_REF
2084 || (TREE_CODE (decl) == INDIRECT_REF
2085 && TREE_CODE (TREE_OPERAND (decl, 0)) == COMPONENT_REF
2086 && (TREE_CODE (TREE_TYPE (TREE_OPERAND (decl, 0)))
2087 == REFERENCE_TYPE)))
2088 break;
2089 if (DECL_SIZE (decl)
2090 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2091 {
2092 tree decl2 = DECL_VALUE_EXPR (decl);
2093 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2094 decl2 = TREE_OPERAND (decl2, 0);
2095 gcc_assert (DECL_P (decl2));
2096 install_var_local (decl2, ctx);
2097 }
2098 install_var_local (decl, ctx);
2099 break;
2100 }
2101 if (DECL_P (decl))
2102 {
2103 if (DECL_SIZE (decl)
2104 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2105 {
2106 tree decl2 = DECL_VALUE_EXPR (decl);
2107 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2108 decl2 = TREE_OPERAND (decl2, 0);
2109 gcc_assert (DECL_P (decl2));
2110 install_var_field (decl2, true, 3, ctx);
2111 install_var_local (decl2, ctx);
2112 install_var_local (decl, ctx);
2113 }
2114 else
2115 {
2116 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
2117 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
2118 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
2119 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2120 install_var_field (decl, true, 7, ctx);
2121 else
2122 install_var_field (decl, true, 3, ctx,
2123 base_pointers_restrict);
2124 if (is_gimple_omp_offloaded (ctx->stmt))
2125 install_var_local (decl, ctx);
2126 }
2127 }
2128 else
2129 {
2130 tree base = get_base_address (decl);
2131 tree nc = OMP_CLAUSE_CHAIN (c);
2132 if (DECL_P (base)
2133 && nc != NULL_TREE
2134 && OMP_CLAUSE_CODE (nc) == OMP_CLAUSE_MAP
2135 && OMP_CLAUSE_DECL (nc) == base
2136 && OMP_CLAUSE_MAP_KIND (nc) == GOMP_MAP_POINTER
2137 && integer_zerop (OMP_CLAUSE_SIZE (nc)))
2138 {
2139 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c) = 1;
2140 OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (nc) = 1;
2141 }
2142 else
2143 {
2144 if (ctx->outer)
2145 {
2146 scan_omp_op (&OMP_CLAUSE_DECL (c), ctx->outer);
2147 decl = OMP_CLAUSE_DECL (c);
2148 }
2149 gcc_assert (!splay_tree_lookup (ctx->field_map,
2150 (splay_tree_key) decl));
2151 tree field
2152 = build_decl (OMP_CLAUSE_LOCATION (c),
2153 FIELD_DECL, NULL_TREE, ptr_type_node);
2154 DECL_ALIGN (field) = TYPE_ALIGN (ptr_type_node);
2155 insert_field_into_struct (ctx->record_type, field);
2156 splay_tree_insert (ctx->field_map, (splay_tree_key) decl,
2157 (splay_tree_value) field);
2158 }
2159 }
2160 break;
2161
2162 case OMP_CLAUSE__GRIDDIM_:
2163 if (ctx->outer)
2164 {
2165 scan_omp_op (&OMP_CLAUSE__GRIDDIM__SIZE (c), ctx->outer);
2166 scan_omp_op (&OMP_CLAUSE__GRIDDIM__GROUP (c), ctx->outer);
2167 }
2168 break;
2169
2170 case OMP_CLAUSE_NOWAIT:
2171 case OMP_CLAUSE_ORDERED:
2172 case OMP_CLAUSE_COLLAPSE:
2173 case OMP_CLAUSE_UNTIED:
2174 case OMP_CLAUSE_MERGEABLE:
2175 case OMP_CLAUSE_PROC_BIND:
2176 case OMP_CLAUSE_SAFELEN:
2177 case OMP_CLAUSE_SIMDLEN:
2178 case OMP_CLAUSE_THREADS:
2179 case OMP_CLAUSE_SIMD:
2180 case OMP_CLAUSE_NOGROUP:
2181 case OMP_CLAUSE_DEFAULTMAP:
2182 case OMP_CLAUSE_ASYNC:
2183 case OMP_CLAUSE_WAIT:
2184 case OMP_CLAUSE_GANG:
2185 case OMP_CLAUSE_WORKER:
2186 case OMP_CLAUSE_VECTOR:
2187 case OMP_CLAUSE_TILE:
2188 case OMP_CLAUSE_INDEPENDENT:
2189 case OMP_CLAUSE_AUTO:
2190 case OMP_CLAUSE_SEQ:
2191 break;
2192
2193 case OMP_CLAUSE_ALIGNED:
2194 decl = OMP_CLAUSE_DECL (c);
2195 if (is_global_var (decl)
2196 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2197 install_var_local (decl, ctx);
2198 break;
2199
2200 case OMP_CLAUSE_DEVICE_RESIDENT:
2201 case OMP_CLAUSE__CACHE_:
2202 sorry ("Clause not supported yet");
2203 break;
2204
2205 default:
2206 gcc_unreachable ();
2207 }
2208 }
2209
2210 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
2211 {
2212 switch (OMP_CLAUSE_CODE (c))
2213 {
2214 case OMP_CLAUSE_LASTPRIVATE:
2215 /* Let the corresponding firstprivate clause create
2216 the variable. */
2217 if (OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
2218 scan_array_reductions = true;
2219 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
2220 break;
2221 /* FALLTHRU */
2222
2223 case OMP_CLAUSE_FIRSTPRIVATE:
2224 case OMP_CLAUSE_PRIVATE:
2225 case OMP_CLAUSE_LINEAR:
2226 case OMP_CLAUSE_IS_DEVICE_PTR:
2227 decl = OMP_CLAUSE_DECL (c);
2228 if (is_variable_sized (decl))
2229 {
2230 if ((OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE
2231 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IS_DEVICE_PTR)
2232 && is_gimple_omp_offloaded (ctx->stmt))
2233 {
2234 tree decl2 = DECL_VALUE_EXPR (decl);
2235 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2236 decl2 = TREE_OPERAND (decl2, 0);
2237 gcc_assert (DECL_P (decl2));
2238 install_var_local (decl2, ctx);
2239 fixup_remapped_decl (decl2, ctx, false);
2240 }
2241 install_var_local (decl, ctx);
2242 }
2243 fixup_remapped_decl (decl, ctx,
2244 OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE
2245 && OMP_CLAUSE_PRIVATE_DEBUG (c));
2246 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2247 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
2248 scan_array_reductions = true;
2249 break;
2250
2251 case OMP_CLAUSE_REDUCTION:
2252 decl = OMP_CLAUSE_DECL (c);
2253 if (TREE_CODE (decl) != MEM_REF)
2254 {
2255 if (is_variable_sized (decl))
2256 install_var_local (decl, ctx);
2257 fixup_remapped_decl (decl, ctx, false);
2258 }
2259 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
2260 scan_array_reductions = true;
2261 break;
2262
2263 case OMP_CLAUSE_SHARED:
2264 /* Ignore shared directives in teams construct. */
2265 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
2266 break;
2267 decl = OMP_CLAUSE_DECL (c);
2268 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
2269 break;
2270 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
2271 {
2272 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl,
2273 ctx->outer)))
2274 break;
2275 bool by_ref = use_pointer_for_field (decl, ctx);
2276 install_var_field (decl, by_ref, 11, ctx);
2277 break;
2278 }
2279 fixup_remapped_decl (decl, ctx, false);
2280 break;
2281
2282 case OMP_CLAUSE_MAP:
2283 if (!is_gimple_omp_offloaded (ctx->stmt))
2284 break;
2285 decl = OMP_CLAUSE_DECL (c);
2286 if (DECL_P (decl)
2287 && ((OMP_CLAUSE_MAP_KIND (c) != GOMP_MAP_FIRSTPRIVATE_POINTER
2288 && (OMP_CLAUSE_MAP_KIND (c)
2289 != GOMP_MAP_FIRSTPRIVATE_REFERENCE))
2290 || TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE)
2291 && is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx))
2292 && varpool_node::get_create (decl)->offloadable)
2293 break;
2294 if (DECL_P (decl))
2295 {
2296 if ((OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
2297 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER)
2298 && TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE
2299 && !COMPLETE_TYPE_P (TREE_TYPE (decl)))
2300 {
2301 tree new_decl = lookup_decl (decl, ctx);
2302 TREE_TYPE (new_decl)
2303 = remap_type (TREE_TYPE (decl), &ctx->cb);
2304 }
2305 else if (DECL_SIZE (decl)
2306 && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
2307 {
2308 tree decl2 = DECL_VALUE_EXPR (decl);
2309 gcc_assert (TREE_CODE (decl2) == INDIRECT_REF);
2310 decl2 = TREE_OPERAND (decl2, 0);
2311 gcc_assert (DECL_P (decl2));
2312 fixup_remapped_decl (decl2, ctx, false);
2313 fixup_remapped_decl (decl, ctx, true);
2314 }
2315 else
2316 fixup_remapped_decl (decl, ctx, false);
2317 }
2318 break;
2319
2320 case OMP_CLAUSE_COPYPRIVATE:
2321 case OMP_CLAUSE_COPYIN:
2322 case OMP_CLAUSE_DEFAULT:
2323 case OMP_CLAUSE_IF:
2324 case OMP_CLAUSE_NUM_THREADS:
2325 case OMP_CLAUSE_NUM_TEAMS:
2326 case OMP_CLAUSE_THREAD_LIMIT:
2327 case OMP_CLAUSE_DEVICE:
2328 case OMP_CLAUSE_SCHEDULE:
2329 case OMP_CLAUSE_DIST_SCHEDULE:
2330 case OMP_CLAUSE_NOWAIT:
2331 case OMP_CLAUSE_ORDERED:
2332 case OMP_CLAUSE_COLLAPSE:
2333 case OMP_CLAUSE_UNTIED:
2334 case OMP_CLAUSE_FINAL:
2335 case OMP_CLAUSE_MERGEABLE:
2336 case OMP_CLAUSE_PROC_BIND:
2337 case OMP_CLAUSE_SAFELEN:
2338 case OMP_CLAUSE_SIMDLEN:
2339 case OMP_CLAUSE_ALIGNED:
2340 case OMP_CLAUSE_DEPEND:
2341 case OMP_CLAUSE__LOOPTEMP_:
2342 case OMP_CLAUSE_TO:
2343 case OMP_CLAUSE_FROM:
2344 case OMP_CLAUSE_PRIORITY:
2345 case OMP_CLAUSE_GRAINSIZE:
2346 case OMP_CLAUSE_NUM_TASKS:
2347 case OMP_CLAUSE_THREADS:
2348 case OMP_CLAUSE_SIMD:
2349 case OMP_CLAUSE_NOGROUP:
2350 case OMP_CLAUSE_DEFAULTMAP:
2351 case OMP_CLAUSE_USE_DEVICE_PTR:
2352 case OMP_CLAUSE__CILK_FOR_COUNT_:
2353 case OMP_CLAUSE_ASYNC:
2354 case OMP_CLAUSE_WAIT:
2355 case OMP_CLAUSE_NUM_GANGS:
2356 case OMP_CLAUSE_NUM_WORKERS:
2357 case OMP_CLAUSE_VECTOR_LENGTH:
2358 case OMP_CLAUSE_GANG:
2359 case OMP_CLAUSE_WORKER:
2360 case OMP_CLAUSE_VECTOR:
2361 case OMP_CLAUSE_TILE:
2362 case OMP_CLAUSE_INDEPENDENT:
2363 case OMP_CLAUSE_AUTO:
2364 case OMP_CLAUSE_SEQ:
2365 case OMP_CLAUSE__GRIDDIM_:
2366 break;
2367
2368 case OMP_CLAUSE_DEVICE_RESIDENT:
2369 case OMP_CLAUSE__CACHE_:
2370 sorry ("Clause not supported yet");
2371 break;
2372
2373 default:
2374 gcc_unreachable ();
2375 }
2376 }
2377
2378 gcc_checking_assert (!scan_array_reductions
2379 || !is_gimple_omp_oacc (ctx->stmt));
2380 if (scan_array_reductions)
2381 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
2382 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
2383 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
2384 {
2385 scan_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c), ctx);
2386 scan_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
2387 }
2388 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
2389 && OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
2390 scan_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
2391 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2392 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
2393 scan_omp (&OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c), ctx);
2394 }
2395
2396 /* Create a new name for omp child function. Returns an identifier. If
2397 IS_CILK_FOR is true then the suffix for the child function is
2398 "_cilk_for_fn." */
2399
2400 static tree
2401 create_omp_child_function_name (bool task_copy, bool is_cilk_for)
2402 {
2403 if (is_cilk_for)
2404 return clone_function_name (current_function_decl, "_cilk_for_fn");
2405 return clone_function_name (current_function_decl,
2406 task_copy ? "_omp_cpyfn" : "_omp_fn");
2407 }
2408
2409 /* Returns the type of the induction variable for the child function for
2410 _Cilk_for and the types for _high and _low variables based on TYPE. */
2411
2412 static tree
2413 cilk_for_check_loop_diff_type (tree type)
2414 {
2415 if (TYPE_PRECISION (type) <= TYPE_PRECISION (uint32_type_node))
2416 {
2417 if (TYPE_UNSIGNED (type))
2418 return uint32_type_node;
2419 else
2420 return integer_type_node;
2421 }
2422 else
2423 {
2424 if (TYPE_UNSIGNED (type))
2425 return uint64_type_node;
2426 else
2427 return long_long_integer_type_node;
2428 }
2429 }
2430
2431 /* Build a decl for the omp child function. It'll not contain a body
2432 yet, just the bare decl. */
2433
2434 static void
2435 create_omp_child_function (omp_context *ctx, bool task_copy)
2436 {
2437 tree decl, type, name, t;
2438
2439 tree cilk_for_count
2440 = (flag_cilkplus && gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
2441 ? find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
2442 OMP_CLAUSE__CILK_FOR_COUNT_) : NULL_TREE;
2443 tree cilk_var_type = NULL_TREE;
2444
2445 name = create_omp_child_function_name (task_copy,
2446 cilk_for_count != NULL_TREE);
2447 if (task_copy)
2448 type = build_function_type_list (void_type_node, ptr_type_node,
2449 ptr_type_node, NULL_TREE);
2450 else if (cilk_for_count)
2451 {
2452 type = TREE_TYPE (OMP_CLAUSE_OPERAND (cilk_for_count, 0));
2453 cilk_var_type = cilk_for_check_loop_diff_type (type);
2454 type = build_function_type_list (void_type_node, ptr_type_node,
2455 cilk_var_type, cilk_var_type, NULL_TREE);
2456 }
2457 else
2458 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2459
2460 decl = build_decl (gimple_location (ctx->stmt), FUNCTION_DECL, name, type);
2461
2462 gcc_checking_assert (!is_gimple_omp_oacc (ctx->stmt)
2463 || !task_copy);
2464 if (!task_copy)
2465 ctx->cb.dst_fn = decl;
2466 else
2467 gimple_omp_task_set_copy_fn (ctx->stmt, decl);
2468
2469 TREE_STATIC (decl) = 1;
2470 TREE_USED (decl) = 1;
2471 DECL_ARTIFICIAL (decl) = 1;
2472 DECL_IGNORED_P (decl) = 0;
2473 TREE_PUBLIC (decl) = 0;
2474 DECL_UNINLINABLE (decl) = 1;
2475 DECL_EXTERNAL (decl) = 0;
2476 DECL_CONTEXT (decl) = NULL_TREE;
2477 DECL_INITIAL (decl) = make_node (BLOCK);
2478 if (cgraph_node::get (current_function_decl)->offloadable)
2479 cgraph_node::get_create (decl)->offloadable = 1;
2480 else
2481 {
2482 omp_context *octx;
2483 for (octx = ctx; octx; octx = octx->outer)
2484 if (is_gimple_omp_offloaded (octx->stmt))
2485 {
2486 cgraph_node::get_create (decl)->offloadable = 1;
2487 if (ENABLE_OFFLOADING)
2488 g->have_offload = true;
2489
2490 break;
2491 }
2492 }
2493
2494 if (cgraph_node::get_create (decl)->offloadable
2495 && !lookup_attribute ("omp declare target",
2496 DECL_ATTRIBUTES (current_function_decl)))
2497 DECL_ATTRIBUTES (decl)
2498 = tree_cons (get_identifier ("omp target entrypoint"),
2499 NULL_TREE, DECL_ATTRIBUTES (decl));
2500
2501 t = build_decl (DECL_SOURCE_LOCATION (decl),
2502 RESULT_DECL, NULL_TREE, void_type_node);
2503 DECL_ARTIFICIAL (t) = 1;
2504 DECL_IGNORED_P (t) = 1;
2505 DECL_CONTEXT (t) = decl;
2506 DECL_RESULT (decl) = t;
2507
2508 /* _Cilk_for's child function requires two extra parameters called
2509 __low and __high that are set the by Cilk runtime when it calls this
2510 function. */
2511 if (cilk_for_count)
2512 {
2513 t = build_decl (DECL_SOURCE_LOCATION (decl),
2514 PARM_DECL, get_identifier ("__high"), cilk_var_type);
2515 DECL_ARTIFICIAL (t) = 1;
2516 DECL_NAMELESS (t) = 1;
2517 DECL_ARG_TYPE (t) = ptr_type_node;
2518 DECL_CONTEXT (t) = current_function_decl;
2519 TREE_USED (t) = 1;
2520 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2521 DECL_ARGUMENTS (decl) = t;
2522
2523 t = build_decl (DECL_SOURCE_LOCATION (decl),
2524 PARM_DECL, get_identifier ("__low"), cilk_var_type);
2525 DECL_ARTIFICIAL (t) = 1;
2526 DECL_NAMELESS (t) = 1;
2527 DECL_ARG_TYPE (t) = ptr_type_node;
2528 DECL_CONTEXT (t) = current_function_decl;
2529 TREE_USED (t) = 1;
2530 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2531 DECL_ARGUMENTS (decl) = t;
2532 }
2533
2534 tree data_name = get_identifier (".omp_data_i");
2535 t = build_decl (DECL_SOURCE_LOCATION (decl), PARM_DECL, data_name,
2536 ptr_type_node);
2537 DECL_ARTIFICIAL (t) = 1;
2538 DECL_NAMELESS (t) = 1;
2539 DECL_ARG_TYPE (t) = ptr_type_node;
2540 DECL_CONTEXT (t) = current_function_decl;
2541 TREE_USED (t) = 1;
2542 TREE_READONLY (t) = 1;
2543 if (cilk_for_count)
2544 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2545 DECL_ARGUMENTS (decl) = t;
2546 if (!task_copy)
2547 ctx->receiver_decl = t;
2548 else
2549 {
2550 t = build_decl (DECL_SOURCE_LOCATION (decl),
2551 PARM_DECL, get_identifier (".omp_data_o"),
2552 ptr_type_node);
2553 DECL_ARTIFICIAL (t) = 1;
2554 DECL_NAMELESS (t) = 1;
2555 DECL_ARG_TYPE (t) = ptr_type_node;
2556 DECL_CONTEXT (t) = current_function_decl;
2557 TREE_USED (t) = 1;
2558 TREE_ADDRESSABLE (t) = 1;
2559 DECL_CHAIN (t) = DECL_ARGUMENTS (decl);
2560 DECL_ARGUMENTS (decl) = t;
2561 }
2562
2563 /* Allocate memory for the function structure. The call to
2564 allocate_struct_function clobbers CFUN, so we need to restore
2565 it afterward. */
2566 push_struct_function (decl);
2567 cfun->function_end_locus = gimple_location (ctx->stmt);
2568 pop_cfun ();
2569 }
2570
2571 /* Callback for walk_gimple_seq. Check if combined parallel
2572 contains gimple_omp_for_combined_into_p OMP_FOR. */
2573
2574 static tree
2575 find_combined_for (gimple_stmt_iterator *gsi_p,
2576 bool *handled_ops_p,
2577 struct walk_stmt_info *wi)
2578 {
2579 gimple *stmt = gsi_stmt (*gsi_p);
2580
2581 *handled_ops_p = true;
2582 switch (gimple_code (stmt))
2583 {
2584 WALK_SUBSTMTS;
2585
2586 case GIMPLE_OMP_FOR:
2587 if (gimple_omp_for_combined_into_p (stmt)
2588 && gimple_omp_for_kind (stmt)
2589 == *(const enum gf_mask *) (wi->info))
2590 {
2591 wi->info = stmt;
2592 return integer_zero_node;
2593 }
2594 break;
2595 default:
2596 break;
2597 }
2598 return NULL;
2599 }
2600
2601 /* Add _LOOPTEMP_ clauses on OpenMP parallel or task. */
2602
2603 static void
2604 add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt,
2605 omp_context *outer_ctx)
2606 {
2607 struct walk_stmt_info wi;
2608
2609 memset (&wi, 0, sizeof (wi));
2610 wi.val_only = true;
2611 wi.info = (void *) &msk;
2612 walk_gimple_seq (gimple_omp_body (stmt), find_combined_for, NULL, &wi);
2613 if (wi.info != (void *) &msk)
2614 {
2615 gomp_for *for_stmt = as_a <gomp_for *> ((gimple *) wi.info);
2616 struct omp_for_data fd;
2617 extract_omp_for_data (for_stmt, &fd, NULL);
2618 /* We need two temporaries with fd.loop.v type (istart/iend)
2619 and then (fd.collapse - 1) temporaries with the same
2620 type for count2 ... countN-1 vars if not constant. */
2621 size_t count = 2, i;
2622 tree type = fd.iter_type;
2623 if (fd.collapse > 1
2624 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
2625 {
2626 count += fd.collapse - 1;
2627 /* If there are lastprivate clauses on the inner
2628 GIMPLE_OMP_FOR, add one more temporaries for the total number
2629 of iterations (product of count1 ... countN-1). */
2630 if (find_omp_clause (gimple_omp_for_clauses (for_stmt),
2631 OMP_CLAUSE_LASTPRIVATE))
2632 count++;
2633 else if (msk == GF_OMP_FOR_KIND_FOR
2634 && find_omp_clause (gimple_omp_parallel_clauses (stmt),
2635 OMP_CLAUSE_LASTPRIVATE))
2636 count++;
2637 }
2638 for (i = 0; i < count; i++)
2639 {
2640 tree temp = create_tmp_var (type);
2641 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
2642 insert_decl_map (&outer_ctx->cb, temp, temp);
2643 OMP_CLAUSE_DECL (c) = temp;
2644 OMP_CLAUSE_CHAIN (c) = gimple_omp_taskreg_clauses (stmt);
2645 gimple_omp_taskreg_set_clauses (stmt, c);
2646 }
2647 }
2648 }
2649
2650 /* Scan an OpenMP parallel directive. */
2651
2652 static void
2653 scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
2654 {
2655 omp_context *ctx;
2656 tree name;
2657 gomp_parallel *stmt = as_a <gomp_parallel *> (gsi_stmt (*gsi));
2658
2659 /* Ignore parallel directives with empty bodies, unless there
2660 are copyin clauses. */
2661 if (optimize > 0
2662 && empty_body_p (gimple_omp_body (stmt))
2663 && find_omp_clause (gimple_omp_parallel_clauses (stmt),
2664 OMP_CLAUSE_COPYIN) == NULL)
2665 {
2666 gsi_replace (gsi, gimple_build_nop (), false);
2667 return;
2668 }
2669
2670 if (gimple_omp_parallel_combined_p (stmt))
2671 add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_FOR, stmt, outer_ctx);
2672
2673 ctx = new_omp_context (stmt, outer_ctx);
2674 taskreg_contexts.safe_push (ctx);
2675 if (taskreg_nesting_level > 1)
2676 ctx->is_nested = true;
2677 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
2678 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
2679 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
2680 name = create_tmp_var_name (".omp_data_s");
2681 name = build_decl (gimple_location (stmt),
2682 TYPE_DECL, name, ctx->record_type);
2683 DECL_ARTIFICIAL (name) = 1;
2684 DECL_NAMELESS (name) = 1;
2685 TYPE_NAME (ctx->record_type) = name;
2686 TYPE_ARTIFICIAL (ctx->record_type) = 1;
2687 if (!gimple_omp_parallel_grid_phony (stmt))
2688 {
2689 create_omp_child_function (ctx, false);
2690 gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
2691 }
2692
2693 scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx);
2694 scan_omp (gimple_omp_body_ptr (stmt), ctx);
2695
2696 if (TYPE_FIELDS (ctx->record_type) == NULL)
2697 ctx->record_type = ctx->receiver_decl = NULL;
2698 }
2699
2700 /* Scan an OpenMP task directive. */
2701
2702 static void
2703 scan_omp_task (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
2704 {
2705 omp_context *ctx;
2706 tree name, t;
2707 gomp_task *stmt = as_a <gomp_task *> (gsi_stmt (*gsi));
2708
2709 /* Ignore task directives with empty bodies. */
2710 if (optimize > 0
2711 && empty_body_p (gimple_omp_body (stmt)))
2712 {
2713 gsi_replace (gsi, gimple_build_nop (), false);
2714 return;
2715 }
2716
2717 if (gimple_omp_task_taskloop_p (stmt))
2718 add_taskreg_looptemp_clauses (GF_OMP_FOR_KIND_TASKLOOP, stmt, outer_ctx);
2719
2720 ctx = new_omp_context (stmt, outer_ctx);
2721 taskreg_contexts.safe_push (ctx);
2722 if (taskreg_nesting_level > 1)
2723 ctx->is_nested = true;
2724 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
2725 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
2726 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
2727 name = create_tmp_var_name (".omp_data_s");
2728 name = build_decl (gimple_location (stmt),
2729 TYPE_DECL, name, ctx->record_type);
2730 DECL_ARTIFICIAL (name) = 1;
2731 DECL_NAMELESS (name) = 1;
2732 TYPE_NAME (ctx->record_type) = name;
2733 TYPE_ARTIFICIAL (ctx->record_type) = 1;
2734 create_omp_child_function (ctx, false);
2735 gimple_omp_task_set_child_fn (stmt, ctx->cb.dst_fn);
2736
2737 scan_sharing_clauses (gimple_omp_task_clauses (stmt), ctx);
2738
2739 if (ctx->srecord_type)
2740 {
2741 name = create_tmp_var_name (".omp_data_a");
2742 name = build_decl (gimple_location (stmt),
2743 TYPE_DECL, name, ctx->srecord_type);
2744 DECL_ARTIFICIAL (name) = 1;
2745 DECL_NAMELESS (name) = 1;
2746 TYPE_NAME (ctx->srecord_type) = name;
2747 TYPE_ARTIFICIAL (ctx->srecord_type) = 1;
2748 create_omp_child_function (ctx, true);
2749 }
2750
2751 scan_omp (gimple_omp_body_ptr (stmt), ctx);
2752
2753 if (TYPE_FIELDS (ctx->record_type) == NULL)
2754 {
2755 ctx->record_type = ctx->receiver_decl = NULL;
2756 t = build_int_cst (long_integer_type_node, 0);
2757 gimple_omp_task_set_arg_size (stmt, t);
2758 t = build_int_cst (long_integer_type_node, 1);
2759 gimple_omp_task_set_arg_align (stmt, t);
2760 }
2761 }
2762
2763
2764 /* If any decls have been made addressable during scan_omp,
2765 adjust their fields if needed, and layout record types
2766 of parallel/task constructs. */
2767
2768 static void
2769 finish_taskreg_scan (omp_context *ctx)
2770 {
2771 if (ctx->record_type == NULL_TREE)
2772 return;
2773
2774 /* If any task_shared_vars were needed, verify all
2775 OMP_CLAUSE_SHARED clauses on GIMPLE_OMP_{PARALLEL,TASK}
2776 statements if use_pointer_for_field hasn't changed
2777 because of that. If it did, update field types now. */
2778 if (task_shared_vars)
2779 {
2780 tree c;
2781
2782 for (c = gimple_omp_taskreg_clauses (ctx->stmt);
2783 c; c = OMP_CLAUSE_CHAIN (c))
2784 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
2785 && !OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
2786 {
2787 tree decl = OMP_CLAUSE_DECL (c);
2788
2789 /* Global variables don't need to be copied,
2790 the receiver side will use them directly. */
2791 if (is_global_var (maybe_lookup_decl_in_outer_ctx (decl, ctx)))
2792 continue;
2793 if (!bitmap_bit_p (task_shared_vars, DECL_UID (decl))
2794 || !use_pointer_for_field (decl, ctx))
2795 continue;
2796 tree field = lookup_field (decl, ctx);
2797 if (TREE_CODE (TREE_TYPE (field)) == POINTER_TYPE
2798 && TREE_TYPE (TREE_TYPE (field)) == TREE_TYPE (decl))
2799 continue;
2800 TREE_TYPE (field) = build_pointer_type (TREE_TYPE (decl));
2801 TREE_THIS_VOLATILE (field) = 0;
2802 DECL_USER_ALIGN (field) = 0;
2803 DECL_ALIGN (field) = TYPE_ALIGN (TREE_TYPE (field));
2804 if (TYPE_ALIGN (ctx->record_type) < DECL_ALIGN (field))
2805 TYPE_ALIGN (ctx->record_type) = DECL_ALIGN (field);
2806 if (ctx->srecord_type)
2807 {
2808 tree sfield = lookup_sfield (decl, ctx);
2809 TREE_TYPE (sfield) = TREE_TYPE (field);
2810 TREE_THIS_VOLATILE (sfield) = 0;
2811 DECL_USER_ALIGN (sfield) = 0;
2812 DECL_ALIGN (sfield) = DECL_ALIGN (field);
2813 if (TYPE_ALIGN (ctx->srecord_type) < DECL_ALIGN (sfield))
2814 TYPE_ALIGN (ctx->srecord_type) = DECL_ALIGN (sfield);
2815 }
2816 }
2817 }
2818
2819 if (gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL)
2820 {
2821 layout_type (ctx->record_type);
2822 fixup_child_record_type (ctx);
2823 }
2824 else
2825 {
2826 location_t loc = gimple_location (ctx->stmt);
2827 tree *p, vla_fields = NULL_TREE, *q = &vla_fields;
2828 /* Move VLA fields to the end. */
2829 p = &TYPE_FIELDS (ctx->record_type);
2830 while (*p)
2831 if (!TYPE_SIZE_UNIT (TREE_TYPE (*p))
2832 || ! TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (*p))))
2833 {
2834 *q = *p;
2835 *p = TREE_CHAIN (*p);
2836 TREE_CHAIN (*q) = NULL_TREE;
2837 q = &TREE_CHAIN (*q);
2838 }
2839 else
2840 p = &DECL_CHAIN (*p);
2841 *p = vla_fields;
2842 if (gimple_omp_task_taskloop_p (ctx->stmt))
2843 {
2844 /* Move fields corresponding to first and second _looptemp_
2845 clause first. There are filled by GOMP_taskloop
2846 and thus need to be in specific positions. */
2847 tree c1 = gimple_omp_task_clauses (ctx->stmt);
2848 c1 = find_omp_clause (c1, OMP_CLAUSE__LOOPTEMP_);
2849 tree c2 = find_omp_clause (OMP_CLAUSE_CHAIN (c1),
2850 OMP_CLAUSE__LOOPTEMP_);
2851 tree f1 = lookup_field (OMP_CLAUSE_DECL (c1), ctx);
2852 tree f2 = lookup_field (OMP_CLAUSE_DECL (c2), ctx);
2853 p = &TYPE_FIELDS (ctx->record_type);
2854 while (*p)
2855 if (*p == f1 || *p == f2)
2856 *p = DECL_CHAIN (*p);
2857 else
2858 p = &DECL_CHAIN (*p);
2859 DECL_CHAIN (f1) = f2;
2860 DECL_CHAIN (f2) = TYPE_FIELDS (ctx->record_type);
2861 TYPE_FIELDS (ctx->record_type) = f1;
2862 if (ctx->srecord_type)
2863 {
2864 f1 = lookup_sfield (OMP_CLAUSE_DECL (c1), ctx);
2865 f2 = lookup_sfield (OMP_CLAUSE_DECL (c2), ctx);
2866 p = &TYPE_FIELDS (ctx->srecord_type);
2867 while (*p)
2868 if (*p == f1 || *p == f2)
2869 *p = DECL_CHAIN (*p);
2870 else
2871 p = &DECL_CHAIN (*p);
2872 DECL_CHAIN (f1) = f2;
2873 DECL_CHAIN (f2) = TYPE_FIELDS (ctx->srecord_type);
2874 TYPE_FIELDS (ctx->srecord_type) = f1;
2875 }
2876 }
2877 layout_type (ctx->record_type);
2878 fixup_child_record_type (ctx);
2879 if (ctx->srecord_type)
2880 layout_type (ctx->srecord_type);
2881 tree t = fold_convert_loc (loc, long_integer_type_node,
2882 TYPE_SIZE_UNIT (ctx->record_type));
2883 gimple_omp_task_set_arg_size (ctx->stmt, t);
2884 t = build_int_cst (long_integer_type_node,
2885 TYPE_ALIGN_UNIT (ctx->record_type));
2886 gimple_omp_task_set_arg_align (ctx->stmt, t);
2887 }
2888 }
2889
2890 /* Find the enclosing offload context. */
2891
2892 static omp_context *
2893 enclosing_target_ctx (omp_context *ctx)
2894 {
2895 for (; ctx; ctx = ctx->outer)
2896 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TARGET)
2897 break;
2898
2899 return ctx;
2900 }
2901
2902 /* Return true if ctx is part of an oacc kernels region. */
2903
2904 static bool
2905 ctx_in_oacc_kernels_region (omp_context *ctx)
2906 {
2907 for (;ctx != NULL; ctx = ctx->outer)
2908 {
2909 gimple *stmt = ctx->stmt;
2910 if (gimple_code (stmt) == GIMPLE_OMP_TARGET
2911 && gimple_omp_target_kind (stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
2912 return true;
2913 }
2914
2915 return false;
2916 }
2917
2918 /* Check the parallelism clauses inside a kernels regions.
2919 Until kernels handling moves to use the same loop indirection
2920 scheme as parallel, we need to do this checking early. */
2921
2922 static unsigned
2923 check_oacc_kernel_gwv (gomp_for *stmt, omp_context *ctx)
2924 {
2925 bool checking = true;
2926 unsigned outer_mask = 0;
2927 unsigned this_mask = 0;
2928 bool has_seq = false, has_auto = false;
2929
2930 if (ctx->outer)
2931 outer_mask = check_oacc_kernel_gwv (NULL, ctx->outer);
2932 if (!stmt)
2933 {
2934 checking = false;
2935 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR)
2936 return outer_mask;
2937 stmt = as_a <gomp_for *> (ctx->stmt);
2938 }
2939
2940 for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
2941 {
2942 switch (OMP_CLAUSE_CODE (c))
2943 {
2944 case OMP_CLAUSE_GANG:
2945 this_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
2946 break;
2947 case OMP_CLAUSE_WORKER:
2948 this_mask |= GOMP_DIM_MASK (GOMP_DIM_WORKER);
2949 break;
2950 case OMP_CLAUSE_VECTOR:
2951 this_mask |= GOMP_DIM_MASK (GOMP_DIM_VECTOR);
2952 break;
2953 case OMP_CLAUSE_SEQ:
2954 has_seq = true;
2955 break;
2956 case OMP_CLAUSE_AUTO:
2957 has_auto = true;
2958 break;
2959 default:
2960 break;
2961 }
2962 }
2963
2964 if (checking)
2965 {
2966 if (has_seq && (this_mask || has_auto))
2967 error_at (gimple_location (stmt), "%<seq%> overrides other"
2968 " OpenACC loop specifiers");
2969 else if (has_auto && this_mask)
2970 error_at (gimple_location (stmt), "%<auto%> conflicts with other"
2971 " OpenACC loop specifiers");
2972
2973 if (this_mask & outer_mask)
2974 error_at (gimple_location (stmt), "inner loop uses same"
2975 " OpenACC parallelism as containing loop");
2976 }
2977
2978 return outer_mask | this_mask;
2979 }
2980
2981 /* Scan a GIMPLE_OMP_FOR. */
2982
2983 static void
2984 scan_omp_for (gomp_for *stmt, omp_context *outer_ctx)
2985 {
2986 omp_context *ctx;
2987 size_t i;
2988 tree clauses = gimple_omp_for_clauses (stmt);
2989
2990 ctx = new_omp_context (stmt, outer_ctx);
2991
2992 if (is_gimple_omp_oacc (stmt))
2993 {
2994 omp_context *tgt = enclosing_target_ctx (outer_ctx);
2995
2996 if (!tgt || is_oacc_parallel (tgt))
2997 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
2998 {
2999 char const *check = NULL;
3000
3001 switch (OMP_CLAUSE_CODE (c))
3002 {
3003 case OMP_CLAUSE_GANG:
3004 check = "gang";
3005 break;
3006
3007 case OMP_CLAUSE_WORKER:
3008 check = "worker";
3009 break;
3010
3011 case OMP_CLAUSE_VECTOR:
3012 check = "vector";
3013 break;
3014
3015 default:
3016 break;
3017 }
3018
3019 if (check && OMP_CLAUSE_OPERAND (c, 0))
3020 error_at (gimple_location (stmt),
3021 "argument not permitted on %qs clause in"
3022 " OpenACC %<parallel%>", check);
3023 }
3024
3025 if (tgt && is_oacc_kernels (tgt))
3026 {
3027 /* Strip out reductions, as they are not handled yet. */
3028 tree *prev_ptr = &clauses;
3029
3030 while (tree probe = *prev_ptr)
3031 {
3032 tree *next_ptr = &OMP_CLAUSE_CHAIN (probe);
3033
3034 if (OMP_CLAUSE_CODE (probe) == OMP_CLAUSE_REDUCTION)
3035 *prev_ptr = *next_ptr;
3036 else
3037 prev_ptr = next_ptr;
3038 }
3039
3040 gimple_omp_for_set_clauses (stmt, clauses);
3041 check_oacc_kernel_gwv (stmt, ctx);
3042 }
3043 }
3044
3045 scan_sharing_clauses (clauses, ctx);
3046
3047 scan_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
3048 for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
3049 {
3050 scan_omp_op (gimple_omp_for_index_ptr (stmt, i), ctx);
3051 scan_omp_op (gimple_omp_for_initial_ptr (stmt, i), ctx);
3052 scan_omp_op (gimple_omp_for_final_ptr (stmt, i), ctx);
3053 scan_omp_op (gimple_omp_for_incr_ptr (stmt, i), ctx);
3054 }
3055 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3056 }
3057
3058 /* Scan an OpenMP sections directive. */
3059
3060 static void
3061 scan_omp_sections (gomp_sections *stmt, omp_context *outer_ctx)
3062 {
3063 omp_context *ctx;
3064
3065 ctx = new_omp_context (stmt, outer_ctx);
3066 scan_sharing_clauses (gimple_omp_sections_clauses (stmt), ctx);
3067 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3068 }
3069
3070 /* Scan an OpenMP single directive. */
3071
3072 static void
3073 scan_omp_single (gomp_single *stmt, omp_context *outer_ctx)
3074 {
3075 omp_context *ctx;
3076 tree name;
3077
3078 ctx = new_omp_context (stmt, outer_ctx);
3079 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
3080 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
3081 name = create_tmp_var_name (".omp_copy_s");
3082 name = build_decl (gimple_location (stmt),
3083 TYPE_DECL, name, ctx->record_type);
3084 TYPE_NAME (ctx->record_type) = name;
3085
3086 scan_sharing_clauses (gimple_omp_single_clauses (stmt), ctx);
3087 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3088
3089 if (TYPE_FIELDS (ctx->record_type) == NULL)
3090 ctx->record_type = NULL;
3091 else
3092 layout_type (ctx->record_type);
3093 }
3094
3095 /* Return true if the CLAUSES of an omp target guarantee that the base pointers
3096 used in the corresponding offloaded function are restrict. */
3097
3098 static bool
3099 omp_target_base_pointers_restrict_p (tree clauses)
3100 {
3101 /* The analysis relies on the GOMP_MAP_FORCE_* mapping kinds, which are only
3102 used by OpenACC. */
3103 if (flag_openacc == 0)
3104 return false;
3105
3106 /* I. Basic example:
3107
3108 void foo (void)
3109 {
3110 unsigned int a[2], b[2];
3111
3112 #pragma acc kernels \
3113 copyout (a) \
3114 copyout (b)
3115 {
3116 a[0] = 0;
3117 b[0] = 1;
3118 }
3119 }
3120
3121 After gimplification, we have:
3122
3123 #pragma omp target oacc_kernels \
3124 map(force_from:a [len: 8]) \
3125 map(force_from:b [len: 8])
3126 {
3127 a[0] = 0;
3128 b[0] = 1;
3129 }
3130
3131 Because both mappings have the force prefix, we know that they will be
3132 allocated when calling the corresponding offloaded function, which means we
3133 can mark the base pointers for a and b in the offloaded function as
3134 restrict. */
3135
3136 tree c;
3137 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
3138 {
3139 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
3140 return false;
3141
3142 switch (OMP_CLAUSE_MAP_KIND (c))
3143 {
3144 case GOMP_MAP_FORCE_ALLOC:
3145 case GOMP_MAP_FORCE_TO:
3146 case GOMP_MAP_FORCE_FROM:
3147 case GOMP_MAP_FORCE_TOFROM:
3148 break;
3149 default:
3150 return false;
3151 }
3152 }
3153
3154 return true;
3155 }
3156
3157 /* Scan a GIMPLE_OMP_TARGET. */
3158
3159 static void
3160 scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
3161 {
3162 omp_context *ctx;
3163 tree name;
3164 bool offloaded = is_gimple_omp_offloaded (stmt);
3165 tree clauses = gimple_omp_target_clauses (stmt);
3166
3167 ctx = new_omp_context (stmt, outer_ctx);
3168 ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0);
3169 ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED;
3170 ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE);
3171 name = create_tmp_var_name (".omp_data_t");
3172 name = build_decl (gimple_location (stmt),
3173 TYPE_DECL, name, ctx->record_type);
3174 DECL_ARTIFICIAL (name) = 1;
3175 DECL_NAMELESS (name) = 1;
3176 TYPE_NAME (ctx->record_type) = name;
3177 TYPE_ARTIFICIAL (ctx->record_type) = 1;
3178
3179 bool base_pointers_restrict = false;
3180 if (offloaded)
3181 {
3182 create_omp_child_function (ctx, false);
3183 gimple_omp_target_set_child_fn (stmt, ctx->cb.dst_fn);
3184
3185 base_pointers_restrict = omp_target_base_pointers_restrict_p (clauses);
3186 if (base_pointers_restrict
3187 && dump_file && (dump_flags & TDF_DETAILS))
3188 fprintf (dump_file,
3189 "Base pointers in offloaded function are restrict\n");
3190 }
3191
3192 scan_sharing_clauses (clauses, ctx, base_pointers_restrict);
3193 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3194
3195 if (TYPE_FIELDS (ctx->record_type) == NULL)
3196 ctx->record_type = ctx->receiver_decl = NULL;
3197 else
3198 {
3199 TYPE_FIELDS (ctx->record_type)
3200 = nreverse (TYPE_FIELDS (ctx->record_type));
3201 if (flag_checking)
3202 {
3203 unsigned int align = DECL_ALIGN (TYPE_FIELDS (ctx->record_type));
3204 for (tree field = TYPE_FIELDS (ctx->record_type);
3205 field;
3206 field = DECL_CHAIN (field))
3207 gcc_assert (DECL_ALIGN (field) == align);
3208 }
3209 layout_type (ctx->record_type);
3210 if (offloaded)
3211 fixup_child_record_type (ctx);
3212 }
3213 }
3214
3215 /* Scan an OpenMP teams directive. */
3216
3217 static void
3218 scan_omp_teams (gomp_teams *stmt, omp_context *outer_ctx)
3219 {
3220 omp_context *ctx = new_omp_context (stmt, outer_ctx);
3221 scan_sharing_clauses (gimple_omp_teams_clauses (stmt), ctx);
3222 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3223 }
3224
3225 /* Check nesting restrictions. */
3226 static bool
3227 check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx)
3228 {
3229 tree c;
3230
3231 if (ctx && gimple_code (ctx->stmt) == GIMPLE_OMP_GRID_BODY)
3232 /* GRID_BODY is an artificial construct, nesting rules will be checked in
3233 the original copy of its contents. */
3234 return true;
3235
3236 /* No nesting of non-OpenACC STMT (that is, an OpenMP one, or a GOMP builtin)
3237 inside an OpenACC CTX. */
3238 if (!(is_gimple_omp (stmt)
3239 && is_gimple_omp_oacc (stmt)))
3240 {
3241 for (omp_context *octx = ctx; octx != NULL; octx = octx->outer)
3242 if (is_gimple_omp (octx->stmt)
3243 && is_gimple_omp_oacc (octx->stmt)
3244 /* Except for atomic codes that we share with OpenMP. */
3245 && ! (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD
3246 || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE))
3247 {
3248 error_at (gimple_location (stmt),
3249 "non-OpenACC construct inside of OpenACC region");
3250 return false;
3251 }
3252 }
3253
3254 if (ctx != NULL)
3255 {
3256 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
3257 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
3258 {
3259 c = NULL_TREE;
3260 if (gimple_code (stmt) == GIMPLE_OMP_ORDERED)
3261 {
3262 c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3263 if (find_omp_clause (c, OMP_CLAUSE_SIMD))
3264 {
3265 if (find_omp_clause (c, OMP_CLAUSE_THREADS)
3266 && (ctx->outer == NULL
3267 || !gimple_omp_for_combined_into_p (ctx->stmt)
3268 || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR
3269 || (gimple_omp_for_kind (ctx->outer->stmt)
3270 != GF_OMP_FOR_KIND_FOR)
3271 || !gimple_omp_for_combined_p (ctx->outer->stmt)))
3272 {
3273 error_at (gimple_location (stmt),
3274 "%<ordered simd threads%> must be closely "
3275 "nested inside of %<for simd%> region");
3276 return false;
3277 }
3278 return true;
3279 }
3280 }
3281 error_at (gimple_location (stmt),
3282 "OpenMP constructs other than %<#pragma omp ordered simd%>"
3283 " may not be nested inside %<simd%> region");
3284 return false;
3285 }
3286 else if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
3287 {
3288 if ((gimple_code (stmt) != GIMPLE_OMP_FOR
3289 || (gimple_omp_for_kind (stmt)
3290 != GF_OMP_FOR_KIND_DISTRIBUTE))
3291 && gimple_code (stmt) != GIMPLE_OMP_PARALLEL)
3292 {
3293 error_at (gimple_location (stmt),
3294 "only %<distribute%> or %<parallel%> regions are "
3295 "allowed to be strictly nested inside %<teams%> "
3296 "region");
3297 return false;
3298 }
3299 }
3300 }
3301 switch (gimple_code (stmt))
3302 {
3303 case GIMPLE_OMP_FOR:
3304 if (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)
3305 return true;
3306 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3307 {
3308 if (ctx != NULL && gimple_code (ctx->stmt) != GIMPLE_OMP_TEAMS)
3309 {
3310 error_at (gimple_location (stmt),
3311 "%<distribute%> region must be strictly nested "
3312 "inside %<teams%> construct");
3313 return false;
3314 }
3315 return true;
3316 }
3317 /* We split taskloop into task and nested taskloop in it. */
3318 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP)
3319 return true;
3320 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
3321 {
3322 bool ok = false;
3323
3324 if (ctx)
3325 switch (gimple_code (ctx->stmt))
3326 {
3327 case GIMPLE_OMP_FOR:
3328 ok = (gimple_omp_for_kind (ctx->stmt)
3329 == GF_OMP_FOR_KIND_OACC_LOOP);
3330 break;
3331
3332 case GIMPLE_OMP_TARGET:
3333 switch (gimple_omp_target_kind (ctx->stmt))
3334 {
3335 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
3336 case GF_OMP_TARGET_KIND_OACC_KERNELS:
3337 ok = true;
3338 break;
3339
3340 default:
3341 break;
3342 }
3343
3344 default:
3345 break;
3346 }
3347 else if (get_oacc_fn_attrib (current_function_decl))
3348 ok = true;
3349 if (!ok)
3350 {
3351 error_at (gimple_location (stmt),
3352 "OpenACC loop directive must be associated with"
3353 " an OpenACC compute region");
3354 return false;
3355 }
3356 }
3357 /* FALLTHRU */
3358 case GIMPLE_CALL:
3359 if (is_gimple_call (stmt)
3360 && (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3361 == BUILT_IN_GOMP_CANCEL
3362 || DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3363 == BUILT_IN_GOMP_CANCELLATION_POINT))
3364 {
3365 const char *bad = NULL;
3366 const char *kind = NULL;
3367 const char *construct
3368 = (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3369 == BUILT_IN_GOMP_CANCEL)
3370 ? "#pragma omp cancel"
3371 : "#pragma omp cancellation point";
3372 if (ctx == NULL)
3373 {
3374 error_at (gimple_location (stmt), "orphaned %qs construct",
3375 construct);
3376 return false;
3377 }
3378 switch (tree_fits_shwi_p (gimple_call_arg (stmt, 0))
3379 ? tree_to_shwi (gimple_call_arg (stmt, 0))
3380 : 0)
3381 {
3382 case 1:
3383 if (gimple_code (ctx->stmt) != GIMPLE_OMP_PARALLEL)
3384 bad = "#pragma omp parallel";
3385 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3386 == BUILT_IN_GOMP_CANCEL
3387 && !integer_zerop (gimple_call_arg (stmt, 1)))
3388 ctx->cancellable = true;
3389 kind = "parallel";
3390 break;
3391 case 2:
3392 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
3393 || gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR)
3394 bad = "#pragma omp for";
3395 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3396 == BUILT_IN_GOMP_CANCEL
3397 && !integer_zerop (gimple_call_arg (stmt, 1)))
3398 {
3399 ctx->cancellable = true;
3400 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3401 OMP_CLAUSE_NOWAIT))
3402 warning_at (gimple_location (stmt), 0,
3403 "%<#pragma omp cancel for%> inside "
3404 "%<nowait%> for construct");
3405 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3406 OMP_CLAUSE_ORDERED))
3407 warning_at (gimple_location (stmt), 0,
3408 "%<#pragma omp cancel for%> inside "
3409 "%<ordered%> for construct");
3410 }
3411 kind = "for";
3412 break;
3413 case 4:
3414 if (gimple_code (ctx->stmt) != GIMPLE_OMP_SECTIONS
3415 && gimple_code (ctx->stmt) != GIMPLE_OMP_SECTION)
3416 bad = "#pragma omp sections";
3417 else if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3418 == BUILT_IN_GOMP_CANCEL
3419 && !integer_zerop (gimple_call_arg (stmt, 1)))
3420 {
3421 if (gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS)
3422 {
3423 ctx->cancellable = true;
3424 if (find_omp_clause (gimple_omp_sections_clauses
3425 (ctx->stmt),
3426 OMP_CLAUSE_NOWAIT))
3427 warning_at (gimple_location (stmt), 0,
3428 "%<#pragma omp cancel sections%> inside "
3429 "%<nowait%> sections construct");
3430 }
3431 else
3432 {
3433 gcc_assert (ctx->outer
3434 && gimple_code (ctx->outer->stmt)
3435 == GIMPLE_OMP_SECTIONS);
3436 ctx->outer->cancellable = true;
3437 if (find_omp_clause (gimple_omp_sections_clauses
3438 (ctx->outer->stmt),
3439 OMP_CLAUSE_NOWAIT))
3440 warning_at (gimple_location (stmt), 0,
3441 "%<#pragma omp cancel sections%> inside "
3442 "%<nowait%> sections construct");
3443 }
3444 }
3445 kind = "sections";
3446 break;
3447 case 8:
3448 if (gimple_code (ctx->stmt) != GIMPLE_OMP_TASK)
3449 bad = "#pragma omp task";
3450 else
3451 {
3452 for (omp_context *octx = ctx->outer;
3453 octx; octx = octx->outer)
3454 {
3455 switch (gimple_code (octx->stmt))
3456 {
3457 case GIMPLE_OMP_TASKGROUP:
3458 break;
3459 case GIMPLE_OMP_TARGET:
3460 if (gimple_omp_target_kind (octx->stmt)
3461 != GF_OMP_TARGET_KIND_REGION)
3462 continue;
3463 /* FALLTHRU */
3464 case GIMPLE_OMP_PARALLEL:
3465 case GIMPLE_OMP_TEAMS:
3466 error_at (gimple_location (stmt),
3467 "%<%s taskgroup%> construct not closely "
3468 "nested inside of %<taskgroup%> region",
3469 construct);
3470 return false;
3471 default:
3472 continue;
3473 }
3474 break;
3475 }
3476 ctx->cancellable = true;
3477 }
3478 kind = "taskgroup";
3479 break;
3480 default:
3481 error_at (gimple_location (stmt), "invalid arguments");
3482 return false;
3483 }
3484 if (bad)
3485 {
3486 error_at (gimple_location (stmt),
3487 "%<%s %s%> construct not closely nested inside of %qs",
3488 construct, kind, bad);
3489 return false;
3490 }
3491 }
3492 /* FALLTHRU */
3493 case GIMPLE_OMP_SECTIONS:
3494 case GIMPLE_OMP_SINGLE:
3495 for (; ctx != NULL; ctx = ctx->outer)
3496 switch (gimple_code (ctx->stmt))
3497 {
3498 case GIMPLE_OMP_FOR:
3499 if (gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR
3500 && gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_TASKLOOP)
3501 break;
3502 /* FALLTHRU */
3503 case GIMPLE_OMP_SECTIONS:
3504 case GIMPLE_OMP_SINGLE:
3505 case GIMPLE_OMP_ORDERED:
3506 case GIMPLE_OMP_MASTER:
3507 case GIMPLE_OMP_TASK:
3508 case GIMPLE_OMP_CRITICAL:
3509 if (is_gimple_call (stmt))
3510 {
3511 if (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))
3512 != BUILT_IN_GOMP_BARRIER)
3513 return true;
3514 error_at (gimple_location (stmt),
3515 "barrier region may not be closely nested inside "
3516 "of work-sharing, %<critical%>, %<ordered%>, "
3517 "%<master%>, explicit %<task%> or %<taskloop%> "
3518 "region");
3519 return false;
3520 }
3521 error_at (gimple_location (stmt),
3522 "work-sharing region may not be closely nested inside "
3523 "of work-sharing, %<critical%>, %<ordered%>, "
3524 "%<master%>, explicit %<task%> or %<taskloop%> region");
3525 return false;
3526 case GIMPLE_OMP_PARALLEL:
3527 case GIMPLE_OMP_TEAMS:
3528 return true;
3529 case GIMPLE_OMP_TARGET:
3530 if (gimple_omp_target_kind (ctx->stmt)
3531 == GF_OMP_TARGET_KIND_REGION)
3532 return true;
3533 break;
3534 default:
3535 break;
3536 }
3537 break;
3538 case GIMPLE_OMP_MASTER:
3539 for (; ctx != NULL; ctx = ctx->outer)
3540 switch (gimple_code (ctx->stmt))
3541 {
3542 case GIMPLE_OMP_FOR:
3543 if (gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_FOR
3544 && gimple_omp_for_kind (ctx->stmt) != GF_OMP_FOR_KIND_TASKLOOP)
3545 break;
3546 /* FALLTHRU */
3547 case GIMPLE_OMP_SECTIONS:
3548 case GIMPLE_OMP_SINGLE:
3549 case GIMPLE_OMP_TASK:
3550 error_at (gimple_location (stmt),
3551 "%<master%> region may not be closely nested inside "
3552 "of work-sharing, explicit %<task%> or %<taskloop%> "
3553 "region");
3554 return false;
3555 case GIMPLE_OMP_PARALLEL:
3556 case GIMPLE_OMP_TEAMS:
3557 return true;
3558 case GIMPLE_OMP_TARGET:
3559 if (gimple_omp_target_kind (ctx->stmt)
3560 == GF_OMP_TARGET_KIND_REGION)
3561 return true;
3562 break;
3563 default:
3564 break;
3565 }
3566 break;
3567 case GIMPLE_OMP_TASK:
3568 for (c = gimple_omp_task_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
3569 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
3570 && (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE
3571 || OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
3572 {
3573 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3574 error_at (OMP_CLAUSE_LOCATION (c),
3575 "%<depend(%s)%> is only allowed in %<omp ordered%>",
3576 kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
3577 return false;
3578 }
3579 break;
3580 case GIMPLE_OMP_ORDERED:
3581 for (c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3582 c; c = OMP_CLAUSE_CHAIN (c))
3583 {
3584 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND)
3585 {
3586 gcc_assert (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_THREADS
3587 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SIMD);
3588 continue;
3589 }
3590 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3591 if (kind == OMP_CLAUSE_DEPEND_SOURCE
3592 || kind == OMP_CLAUSE_DEPEND_SINK)
3593 {
3594 tree oclause;
3595 /* Look for containing ordered(N) loop. */
3596 if (ctx == NULL
3597 || gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
3598 || (oclause
3599 = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3600 OMP_CLAUSE_ORDERED)) == NULL_TREE)
3601 {
3602 error_at (OMP_CLAUSE_LOCATION (c),
3603 "%<ordered%> construct with %<depend%> clause "
3604 "must be closely nested inside an %<ordered%> "
3605 "loop");
3606 return false;
3607 }
3608 else if (OMP_CLAUSE_ORDERED_EXPR (oclause) == NULL_TREE)
3609 {
3610 error_at (OMP_CLAUSE_LOCATION (c),
3611 "%<ordered%> construct with %<depend%> clause "
3612 "must be closely nested inside a loop with "
3613 "%<ordered%> clause with a parameter");
3614 return false;
3615 }
3616 }
3617 else
3618 {
3619 error_at (OMP_CLAUSE_LOCATION (c),
3620 "invalid depend kind in omp %<ordered%> %<depend%>");
3621 return false;
3622 }
3623 }
3624 c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt));
3625 if (find_omp_clause (c, OMP_CLAUSE_SIMD))
3626 {
3627 /* ordered simd must be closely nested inside of simd region,
3628 and simd region must not encounter constructs other than
3629 ordered simd, therefore ordered simd may be either orphaned,
3630 or ctx->stmt must be simd. The latter case is handled already
3631 earlier. */
3632 if (ctx != NULL)
3633 {
3634 error_at (gimple_location (stmt),
3635 "%<ordered%> %<simd%> must be closely nested inside "
3636 "%<simd%> region");
3637 return false;
3638 }
3639 }
3640 for (; ctx != NULL; ctx = ctx->outer)
3641 switch (gimple_code (ctx->stmt))
3642 {
3643 case GIMPLE_OMP_CRITICAL:
3644 case GIMPLE_OMP_TASK:
3645 case GIMPLE_OMP_ORDERED:
3646 ordered_in_taskloop:
3647 error_at (gimple_location (stmt),
3648 "%<ordered%> region may not be closely nested inside "
3649 "of %<critical%>, %<ordered%>, explicit %<task%> or "
3650 "%<taskloop%> region");
3651 return false;
3652 case GIMPLE_OMP_FOR:
3653 if (gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP)
3654 goto ordered_in_taskloop;
3655 if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
3656 OMP_CLAUSE_ORDERED) == NULL)
3657 {
3658 error_at (gimple_location (stmt),
3659 "%<ordered%> region must be closely nested inside "
3660 "a loop region with an %<ordered%> clause");
3661 return false;
3662 }
3663 return true;
3664 case GIMPLE_OMP_TARGET:
3665 if (gimple_omp_target_kind (ctx->stmt)
3666 != GF_OMP_TARGET_KIND_REGION)
3667 break;
3668 /* FALLTHRU */
3669 case GIMPLE_OMP_PARALLEL:
3670 case GIMPLE_OMP_TEAMS:
3671 error_at (gimple_location (stmt),
3672 "%<ordered%> region must be closely nested inside "
3673 "a loop region with an %<ordered%> clause");
3674 return false;
3675 default:
3676 break;
3677 }
3678 break;
3679 case GIMPLE_OMP_CRITICAL:
3680 {
3681 tree this_stmt_name
3682 = gimple_omp_critical_name (as_a <gomp_critical *> (stmt));
3683 for (; ctx != NULL; ctx = ctx->outer)
3684 if (gomp_critical *other_crit
3685 = dyn_cast <gomp_critical *> (ctx->stmt))
3686 if (this_stmt_name == gimple_omp_critical_name (other_crit))
3687 {
3688 error_at (gimple_location (stmt),
3689 "%<critical%> region may not be nested inside "
3690 "a %<critical%> region with the same name");
3691 return false;
3692 }
3693 }
3694 break;
3695 case GIMPLE_OMP_TEAMS:
3696 if (ctx == NULL
3697 || gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET
3698 || gimple_omp_target_kind (ctx->stmt) != GF_OMP_TARGET_KIND_REGION)
3699 {
3700 error_at (gimple_location (stmt),
3701 "%<teams%> construct not closely nested inside of "
3702 "%<target%> construct");
3703 return false;
3704 }
3705 break;
3706 case GIMPLE_OMP_TARGET:
3707 for (c = gimple_omp_target_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
3708 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
3709 && (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE
3710 || OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK))
3711 {
3712 enum omp_clause_depend_kind kind = OMP_CLAUSE_DEPEND_KIND (c);
3713 error_at (OMP_CLAUSE_LOCATION (c),
3714 "%<depend(%s)%> is only allowed in %<omp ordered%>",
3715 kind == OMP_CLAUSE_DEPEND_SOURCE ? "source" : "sink");
3716 return false;
3717 }
3718 for (; ctx != NULL; ctx = ctx->outer)
3719 {
3720 if (gimple_code (ctx->stmt) != GIMPLE_OMP_TARGET)
3721 {
3722 if (is_gimple_omp (stmt)
3723 && is_gimple_omp_oacc (stmt)
3724 && is_gimple_omp (ctx->stmt))
3725 {
3726 error_at (gimple_location (stmt),
3727 "OpenACC construct inside of non-OpenACC region");
3728 return false;
3729 }
3730 continue;
3731 }
3732
3733 const char *stmt_name, *ctx_stmt_name;
3734 switch (gimple_omp_target_kind (stmt))
3735 {
3736 case GF_OMP_TARGET_KIND_REGION: stmt_name = "target"; break;
3737 case GF_OMP_TARGET_KIND_DATA: stmt_name = "target data"; break;
3738 case GF_OMP_TARGET_KIND_UPDATE: stmt_name = "target update"; break;
3739 case GF_OMP_TARGET_KIND_ENTER_DATA:
3740 stmt_name = "target enter data"; break;
3741 case GF_OMP_TARGET_KIND_EXIT_DATA:
3742 stmt_name = "target exit data"; break;
3743 case GF_OMP_TARGET_KIND_OACC_PARALLEL: stmt_name = "parallel"; break;
3744 case GF_OMP_TARGET_KIND_OACC_KERNELS: stmt_name = "kernels"; break;
3745 case GF_OMP_TARGET_KIND_OACC_DATA: stmt_name = "data"; break;
3746 case GF_OMP_TARGET_KIND_OACC_UPDATE: stmt_name = "update"; break;
3747 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
3748 stmt_name = "enter/exit data"; break;
3749 case GF_OMP_TARGET_KIND_OACC_HOST_DATA: stmt_name = "host_data";
3750 break;
3751 default: gcc_unreachable ();
3752 }
3753 switch (gimple_omp_target_kind (ctx->stmt))
3754 {
3755 case GF_OMP_TARGET_KIND_REGION: ctx_stmt_name = "target"; break;
3756 case GF_OMP_TARGET_KIND_DATA: ctx_stmt_name = "target data"; break;
3757 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
3758 ctx_stmt_name = "parallel"; break;
3759 case GF_OMP_TARGET_KIND_OACC_KERNELS:
3760 ctx_stmt_name = "kernels"; break;
3761 case GF_OMP_TARGET_KIND_OACC_DATA: ctx_stmt_name = "data"; break;
3762 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
3763 ctx_stmt_name = "host_data"; break;
3764 default: gcc_unreachable ();
3765 }
3766
3767 /* OpenACC/OpenMP mismatch? */
3768 if (is_gimple_omp_oacc (stmt)
3769 != is_gimple_omp_oacc (ctx->stmt))
3770 {
3771 error_at (gimple_location (stmt),
3772 "%s %qs construct inside of %s %qs region",
3773 (is_gimple_omp_oacc (stmt)
3774 ? "OpenACC" : "OpenMP"), stmt_name,
3775 (is_gimple_omp_oacc (ctx->stmt)
3776 ? "OpenACC" : "OpenMP"), ctx_stmt_name);
3777 return false;
3778 }
3779 if (is_gimple_omp_offloaded (ctx->stmt))
3780 {
3781 /* No GIMPLE_OMP_TARGET inside offloaded OpenACC CTX. */
3782 if (is_gimple_omp_oacc (ctx->stmt))
3783 {
3784 error_at (gimple_location (stmt),
3785 "%qs construct inside of %qs region",
3786 stmt_name, ctx_stmt_name);
3787 return false;
3788 }
3789 else
3790 {
3791 warning_at (gimple_location (stmt), 0,
3792 "%qs construct inside of %qs region",
3793 stmt_name, ctx_stmt_name);
3794 }
3795 }
3796 }
3797 break;
3798 default:
3799 break;
3800 }
3801 return true;
3802 }
3803
3804
3805 /* Helper function scan_omp.
3806
3807 Callback for walk_tree or operators in walk_gimple_stmt used to
3808 scan for OMP directives in TP. */
3809
3810 static tree
3811 scan_omp_1_op (tree *tp, int *walk_subtrees, void *data)
3812 {
3813 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
3814 omp_context *ctx = (omp_context *) wi->info;
3815 tree t = *tp;
3816
3817 switch (TREE_CODE (t))
3818 {
3819 case VAR_DECL:
3820 case PARM_DECL:
3821 case LABEL_DECL:
3822 case RESULT_DECL:
3823 if (ctx)
3824 {
3825 tree repl = remap_decl (t, &ctx->cb);
3826 gcc_checking_assert (TREE_CODE (repl) != ERROR_MARK);
3827 *tp = repl;
3828 }
3829 break;
3830
3831 default:
3832 if (ctx && TYPE_P (t))
3833 *tp = remap_type (t, &ctx->cb);
3834 else if (!DECL_P (t))
3835 {
3836 *walk_subtrees = 1;
3837 if (ctx)
3838 {
3839 tree tem = remap_type (TREE_TYPE (t), &ctx->cb);
3840 if (tem != TREE_TYPE (t))
3841 {
3842 if (TREE_CODE (t) == INTEGER_CST)
3843 *tp = wide_int_to_tree (tem, t);
3844 else
3845 TREE_TYPE (t) = tem;
3846 }
3847 }
3848 }
3849 break;
3850 }
3851
3852 return NULL_TREE;
3853 }
3854
3855 /* Return true if FNDECL is a setjmp or a longjmp. */
3856
3857 static bool
3858 setjmp_or_longjmp_p (const_tree fndecl)
3859 {
3860 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
3861 && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_SETJMP
3862 || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LONGJMP))
3863 return true;
3864
3865 tree declname = DECL_NAME (fndecl);
3866 if (!declname)
3867 return false;
3868 const char *name = IDENTIFIER_POINTER (declname);
3869 return !strcmp (name, "setjmp") || !strcmp (name, "longjmp");
3870 }
3871
3872
3873 /* Helper function for scan_omp.
3874
3875 Callback for walk_gimple_stmt used to scan for OMP directives in
3876 the current statement in GSI. */
3877
3878 static tree
3879 scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p,
3880 struct walk_stmt_info *wi)
3881 {
3882 gimple *stmt = gsi_stmt (*gsi);
3883 omp_context *ctx = (omp_context *) wi->info;
3884
3885 if (gimple_has_location (stmt))
3886 input_location = gimple_location (stmt);
3887
3888 /* Check the nesting restrictions. */
3889 bool remove = false;
3890 if (is_gimple_omp (stmt))
3891 remove = !check_omp_nesting_restrictions (stmt, ctx);
3892 else if (is_gimple_call (stmt))
3893 {
3894 tree fndecl = gimple_call_fndecl (stmt);
3895 if (fndecl)
3896 {
3897 if (setjmp_or_longjmp_p (fndecl)
3898 && ctx
3899 && gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
3900 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
3901 {
3902 remove = true;
3903 error_at (gimple_location (stmt),
3904 "setjmp/longjmp inside simd construct");
3905 }
3906 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3907 switch (DECL_FUNCTION_CODE (fndecl))
3908 {
3909 case BUILT_IN_GOMP_BARRIER:
3910 case BUILT_IN_GOMP_CANCEL:
3911 case BUILT_IN_GOMP_CANCELLATION_POINT:
3912 case BUILT_IN_GOMP_TASKYIELD:
3913 case BUILT_IN_GOMP_TASKWAIT:
3914 case BUILT_IN_GOMP_TASKGROUP_START:
3915 case BUILT_IN_GOMP_TASKGROUP_END:
3916 remove = !check_omp_nesting_restrictions (stmt, ctx);
3917 break;
3918 default:
3919 break;
3920 }
3921 }
3922 }
3923 if (remove)
3924 {
3925 stmt = gimple_build_nop ();
3926 gsi_replace (gsi, stmt, false);
3927 }
3928
3929 *handled_ops_p = true;
3930
3931 switch (gimple_code (stmt))
3932 {
3933 case GIMPLE_OMP_PARALLEL:
3934 taskreg_nesting_level++;
3935 scan_omp_parallel (gsi, ctx);
3936 taskreg_nesting_level--;
3937 break;
3938
3939 case GIMPLE_OMP_TASK:
3940 taskreg_nesting_level++;
3941 scan_omp_task (gsi, ctx);
3942 taskreg_nesting_level--;
3943 break;
3944
3945 case GIMPLE_OMP_FOR:
3946 scan_omp_for (as_a <gomp_for *> (stmt), ctx);
3947 break;
3948
3949 case GIMPLE_OMP_SECTIONS:
3950 scan_omp_sections (as_a <gomp_sections *> (stmt), ctx);
3951 break;
3952
3953 case GIMPLE_OMP_SINGLE:
3954 scan_omp_single (as_a <gomp_single *> (stmt), ctx);
3955 break;
3956
3957 case GIMPLE_OMP_SECTION:
3958 case GIMPLE_OMP_MASTER:
3959 case GIMPLE_OMP_TASKGROUP:
3960 case GIMPLE_OMP_ORDERED:
3961 case GIMPLE_OMP_CRITICAL:
3962 case GIMPLE_OMP_GRID_BODY:
3963 ctx = new_omp_context (stmt, ctx);
3964 scan_omp (gimple_omp_body_ptr (stmt), ctx);
3965 break;
3966
3967 case GIMPLE_OMP_TARGET:
3968 scan_omp_target (as_a <gomp_target *> (stmt), ctx);
3969 break;
3970
3971 case GIMPLE_OMP_TEAMS:
3972 scan_omp_teams (as_a <gomp_teams *> (stmt), ctx);
3973 break;
3974
3975 case GIMPLE_BIND:
3976 {
3977 tree var;
3978
3979 *handled_ops_p = false;
3980 if (ctx)
3981 for (var = gimple_bind_vars (as_a <gbind *> (stmt));
3982 var ;
3983 var = DECL_CHAIN (var))
3984 insert_decl_map (&ctx->cb, var, var);
3985 }
3986 break;
3987 default:
3988 *handled_ops_p = false;
3989 break;
3990 }
3991
3992 return NULL_TREE;
3993 }
3994
3995
3996 /* Scan all the statements starting at the current statement. CTX
3997 contains context information about the OMP directives and
3998 clauses found during the scan. */
3999
4000 static void
4001 scan_omp (gimple_seq *body_p, omp_context *ctx)
4002 {
4003 location_t saved_location;
4004 struct walk_stmt_info wi;
4005
4006 memset (&wi, 0, sizeof (wi));
4007 wi.info = ctx;
4008 wi.want_locations = true;
4009
4010 saved_location = input_location;
4011 walk_gimple_seq_mod (body_p, scan_omp_1_stmt, scan_omp_1_op, &wi);
4012 input_location = saved_location;
4013 }
4014 \f
4015 /* Re-gimplification and code generation routines. */
4016
4017 /* Build a call to GOMP_barrier. */
4018
4019 static gimple *
4020 build_omp_barrier (tree lhs)
4021 {
4022 tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL
4023 : BUILT_IN_GOMP_BARRIER);
4024 gcall *g = gimple_build_call (fndecl, 0);
4025 if (lhs)
4026 gimple_call_set_lhs (g, lhs);
4027 return g;
4028 }
4029
4030 /* If a context was created for STMT when it was scanned, return it. */
4031
4032 static omp_context *
4033 maybe_lookup_ctx (gimple *stmt)
4034 {
4035 splay_tree_node n;
4036 n = splay_tree_lookup (all_contexts, (splay_tree_key) stmt);
4037 return n ? (omp_context *) n->value : NULL;
4038 }
4039
4040
4041 /* Find the mapping for DECL in CTX or the immediately enclosing
4042 context that has a mapping for DECL.
4043
4044 If CTX is a nested parallel directive, we may have to use the decl
4045 mappings created in CTX's parent context. Suppose that we have the
4046 following parallel nesting (variable UIDs showed for clarity):
4047
4048 iD.1562 = 0;
4049 #omp parallel shared(iD.1562) -> outer parallel
4050 iD.1562 = iD.1562 + 1;
4051
4052 #omp parallel shared (iD.1562) -> inner parallel
4053 iD.1562 = iD.1562 - 1;
4054
4055 Each parallel structure will create a distinct .omp_data_s structure
4056 for copying iD.1562 in/out of the directive:
4057
4058 outer parallel .omp_data_s.1.i -> iD.1562
4059 inner parallel .omp_data_s.2.i -> iD.1562
4060
4061 A shared variable mapping will produce a copy-out operation before
4062 the parallel directive and a copy-in operation after it. So, in
4063 this case we would have:
4064
4065 iD.1562 = 0;
4066 .omp_data_o.1.i = iD.1562;
4067 #omp parallel shared(iD.1562) -> outer parallel
4068 .omp_data_i.1 = &.omp_data_o.1
4069 .omp_data_i.1->i = .omp_data_i.1->i + 1;
4070
4071 .omp_data_o.2.i = iD.1562; -> **
4072 #omp parallel shared(iD.1562) -> inner parallel
4073 .omp_data_i.2 = &.omp_data_o.2
4074 .omp_data_i.2->i = .omp_data_i.2->i - 1;
4075
4076
4077 ** This is a problem. The symbol iD.1562 cannot be referenced
4078 inside the body of the outer parallel region. But since we are
4079 emitting this copy operation while expanding the inner parallel
4080 directive, we need to access the CTX structure of the outer
4081 parallel directive to get the correct mapping:
4082
4083 .omp_data_o.2.i = .omp_data_i.1->i
4084
4085 Since there may be other workshare or parallel directives enclosing
4086 the parallel directive, it may be necessary to walk up the context
4087 parent chain. This is not a problem in general because nested
4088 parallelism happens only rarely. */
4089
4090 static tree
4091 lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
4092 {
4093 tree t;
4094 omp_context *up;
4095
4096 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
4097 t = maybe_lookup_decl (decl, up);
4098
4099 gcc_assert (!ctx->is_nested || t || is_global_var (decl));
4100
4101 return t ? t : decl;
4102 }
4103
4104
4105 /* Similar to lookup_decl_in_outer_ctx, but return DECL if not found
4106 in outer contexts. */
4107
4108 static tree
4109 maybe_lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
4110 {
4111 tree t = NULL;
4112 omp_context *up;
4113
4114 for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
4115 t = maybe_lookup_decl (decl, up);
4116
4117 return t ? t : decl;
4118 }
4119
4120
4121 /* Construct the initialization value for reduction operation OP. */
4122
4123 tree
4124 omp_reduction_init_op (location_t loc, enum tree_code op, tree type)
4125 {
4126 switch (op)
4127 {
4128 case PLUS_EXPR:
4129 case MINUS_EXPR:
4130 case BIT_IOR_EXPR:
4131 case BIT_XOR_EXPR:
4132 case TRUTH_OR_EXPR:
4133 case TRUTH_ORIF_EXPR:
4134 case TRUTH_XOR_EXPR:
4135 case NE_EXPR:
4136 return build_zero_cst (type);
4137
4138 case MULT_EXPR:
4139 case TRUTH_AND_EXPR:
4140 case TRUTH_ANDIF_EXPR:
4141 case EQ_EXPR:
4142 return fold_convert_loc (loc, type, integer_one_node);
4143
4144 case BIT_AND_EXPR:
4145 return fold_convert_loc (loc, type, integer_minus_one_node);
4146
4147 case MAX_EXPR:
4148 if (SCALAR_FLOAT_TYPE_P (type))
4149 {
4150 REAL_VALUE_TYPE max, min;
4151 if (HONOR_INFINITIES (type))
4152 {
4153 real_inf (&max);
4154 real_arithmetic (&min, NEGATE_EXPR, &max, NULL);
4155 }
4156 else
4157 real_maxval (&min, 1, TYPE_MODE (type));
4158 return build_real (type, min);
4159 }
4160 else if (POINTER_TYPE_P (type))
4161 {
4162 wide_int min
4163 = wi::min_value (TYPE_PRECISION (type), TYPE_SIGN (type));
4164 return wide_int_to_tree (type, min);
4165 }
4166 else
4167 {
4168 gcc_assert (INTEGRAL_TYPE_P (type));
4169 return TYPE_MIN_VALUE (type);
4170 }
4171
4172 case MIN_EXPR:
4173 if (SCALAR_FLOAT_TYPE_P (type))
4174 {
4175 REAL_VALUE_TYPE max;
4176 if (HONOR_INFINITIES (type))
4177 real_inf (&max);
4178 else
4179 real_maxval (&max, 0, TYPE_MODE (type));
4180 return build_real (type, max);
4181 }
4182 else if (POINTER_TYPE_P (type))
4183 {
4184 wide_int max
4185 = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
4186 return wide_int_to_tree (type, max);
4187 }
4188 else
4189 {
4190 gcc_assert (INTEGRAL_TYPE_P (type));
4191 return TYPE_MAX_VALUE (type);
4192 }
4193
4194 default:
4195 gcc_unreachable ();
4196 }
4197 }
4198
4199 /* Construct the initialization value for reduction CLAUSE. */
4200
4201 tree
4202 omp_reduction_init (tree clause, tree type)
4203 {
4204 return omp_reduction_init_op (OMP_CLAUSE_LOCATION (clause),
4205 OMP_CLAUSE_REDUCTION_CODE (clause), type);
4206 }
4207
4208 /* Return alignment to be assumed for var in CLAUSE, which should be
4209 OMP_CLAUSE_ALIGNED. */
4210
4211 static tree
4212 omp_clause_aligned_alignment (tree clause)
4213 {
4214 if (OMP_CLAUSE_ALIGNED_ALIGNMENT (clause))
4215 return OMP_CLAUSE_ALIGNED_ALIGNMENT (clause);
4216
4217 /* Otherwise return implementation defined alignment. */
4218 unsigned int al = 1;
4219 machine_mode mode, vmode;
4220 int vs = targetm.vectorize.autovectorize_vector_sizes ();
4221 if (vs)
4222 vs = 1 << floor_log2 (vs);
4223 static enum mode_class classes[]
4224 = { MODE_INT, MODE_VECTOR_INT, MODE_FLOAT, MODE_VECTOR_FLOAT };
4225 for (int i = 0; i < 4; i += 2)
4226 for (mode = GET_CLASS_NARROWEST_MODE (classes[i]);
4227 mode != VOIDmode;
4228 mode = GET_MODE_WIDER_MODE (mode))
4229 {
4230 vmode = targetm.vectorize.preferred_simd_mode (mode);
4231 if (GET_MODE_CLASS (vmode) != classes[i + 1])
4232 continue;
4233 while (vs
4234 && GET_MODE_SIZE (vmode) < vs
4235 && GET_MODE_2XWIDER_MODE (vmode) != VOIDmode)
4236 vmode = GET_MODE_2XWIDER_MODE (vmode);
4237
4238 tree type = lang_hooks.types.type_for_mode (mode, 1);
4239 if (type == NULL_TREE || TYPE_MODE (type) != mode)
4240 continue;
4241 type = build_vector_type (type, GET_MODE_SIZE (vmode)
4242 / GET_MODE_SIZE (mode));
4243 if (TYPE_MODE (type) != vmode)
4244 continue;
4245 if (TYPE_ALIGN_UNIT (type) > al)
4246 al = TYPE_ALIGN_UNIT (type);
4247 }
4248 return build_int_cst (integer_type_node, al);
4249 }
4250
4251 /* Return maximum possible vectorization factor for the target. */
4252
4253 static int
4254 omp_max_vf (void)
4255 {
4256 if (!optimize
4257 || optimize_debug
4258 || !flag_tree_loop_optimize
4259 || (!flag_tree_loop_vectorize
4260 && (global_options_set.x_flag_tree_loop_vectorize
4261 || global_options_set.x_flag_tree_vectorize)))
4262 return 1;
4263
4264 int vs = targetm.vectorize.autovectorize_vector_sizes ();
4265 if (vs)
4266 {
4267 vs = 1 << floor_log2 (vs);
4268 return vs;
4269 }
4270 machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
4271 if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
4272 return GET_MODE_NUNITS (vqimode);
4273 return 1;
4274 }
4275
4276 /* Helper function of lower_rec_input_clauses, used for #pragma omp simd
4277 privatization. */
4278
4279 static bool
4280 lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf,
4281 tree &idx, tree &lane, tree &ivar, tree &lvar)
4282 {
4283 if (max_vf == 0)
4284 {
4285 max_vf = omp_max_vf ();
4286 if (max_vf > 1)
4287 {
4288 tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
4289 OMP_CLAUSE_SAFELEN);
4290 if (c && TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) != INTEGER_CST)
4291 max_vf = 1;
4292 else if (c && compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c),
4293 max_vf) == -1)
4294 max_vf = tree_to_shwi (OMP_CLAUSE_SAFELEN_EXPR (c));
4295 }
4296 if (max_vf > 1)
4297 {
4298 idx = create_tmp_var (unsigned_type_node);
4299 lane = create_tmp_var (unsigned_type_node);
4300 }
4301 }
4302 if (max_vf == 1)
4303 return false;
4304
4305 tree atype = build_array_type_nelts (TREE_TYPE (new_var), max_vf);
4306 tree avar = create_tmp_var_raw (atype);
4307 if (TREE_ADDRESSABLE (new_var))
4308 TREE_ADDRESSABLE (avar) = 1;
4309 DECL_ATTRIBUTES (avar)
4310 = tree_cons (get_identifier ("omp simd array"), NULL,
4311 DECL_ATTRIBUTES (avar));
4312 gimple_add_tmp_var (avar);
4313 ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, idx,
4314 NULL_TREE, NULL_TREE);
4315 lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, lane,
4316 NULL_TREE, NULL_TREE);
4317 if (DECL_P (new_var))
4318 {
4319 SET_DECL_VALUE_EXPR (new_var, lvar);
4320 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4321 }
4322 return true;
4323 }
4324
4325 /* Helper function of lower_rec_input_clauses. For a reference
4326 in simd reduction, add an underlying variable it will reference. */
4327
4328 static void
4329 handle_simd_reference (location_t loc, tree new_vard, gimple_seq *ilist)
4330 {
4331 tree z = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_vard)));
4332 if (TREE_CONSTANT (z))
4333 {
4334 z = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_vard)),
4335 get_name (new_vard));
4336 gimple_add_tmp_var (z);
4337 TREE_ADDRESSABLE (z) = 1;
4338 z = build_fold_addr_expr_loc (loc, z);
4339 gimplify_assign (new_vard, z, ilist);
4340 }
4341 }
4342
4343 /* Generate code to implement the input clauses, FIRSTPRIVATE and COPYIN,
4344 from the receiver (aka child) side and initializers for REFERENCE_TYPE
4345 private variables. Initialization statements go in ILIST, while calls
4346 to destructors go in DLIST. */
4347
4348 static void
4349 lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
4350 omp_context *ctx, struct omp_for_data *fd)
4351 {
4352 tree c, dtor, copyin_seq, x, ptr;
4353 bool copyin_by_ref = false;
4354 bool lastprivate_firstprivate = false;
4355 bool reduction_omp_orig_ref = false;
4356 int pass;
4357 bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
4358 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
4359 int max_vf = 0;
4360 tree lane = NULL_TREE, idx = NULL_TREE;
4361 tree ivar = NULL_TREE, lvar = NULL_TREE;
4362 gimple_seq llist[2] = { NULL, NULL };
4363
4364 copyin_seq = NULL;
4365
4366 /* Set max_vf=1 (which will later enforce safelen=1) in simd loops
4367 with data sharing clauses referencing variable sized vars. That
4368 is unnecessarily hard to support and very unlikely to result in
4369 vectorized code anyway. */
4370 if (is_simd)
4371 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
4372 switch (OMP_CLAUSE_CODE (c))
4373 {
4374 case OMP_CLAUSE_LINEAR:
4375 if (OMP_CLAUSE_LINEAR_ARRAY (c))
4376 max_vf = 1;
4377 /* FALLTHRU */
4378 case OMP_CLAUSE_PRIVATE:
4379 case OMP_CLAUSE_FIRSTPRIVATE:
4380 case OMP_CLAUSE_LASTPRIVATE:
4381 if (is_variable_sized (OMP_CLAUSE_DECL (c)))
4382 max_vf = 1;
4383 break;
4384 case OMP_CLAUSE_REDUCTION:
4385 if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF
4386 || is_variable_sized (OMP_CLAUSE_DECL (c)))
4387 max_vf = 1;
4388 break;
4389 default:
4390 continue;
4391 }
4392
4393 /* Do all the fixed sized types in the first pass, and the variable sized
4394 types in the second pass. This makes sure that the scalar arguments to
4395 the variable sized types are processed before we use them in the
4396 variable sized operations. */
4397 for (pass = 0; pass < 2; ++pass)
4398 {
4399 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
4400 {
4401 enum omp_clause_code c_kind = OMP_CLAUSE_CODE (c);
4402 tree var, new_var;
4403 bool by_ref;
4404 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
4405
4406 switch (c_kind)
4407 {
4408 case OMP_CLAUSE_PRIVATE:
4409 if (OMP_CLAUSE_PRIVATE_DEBUG (c))
4410 continue;
4411 break;
4412 case OMP_CLAUSE_SHARED:
4413 /* Ignore shared directives in teams construct. */
4414 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
4415 continue;
4416 if (maybe_lookup_decl (OMP_CLAUSE_DECL (c), ctx) == NULL)
4417 {
4418 gcc_assert (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c)
4419 || is_global_var (OMP_CLAUSE_DECL (c)));
4420 continue;
4421 }
4422 case OMP_CLAUSE_FIRSTPRIVATE:
4423 case OMP_CLAUSE_COPYIN:
4424 break;
4425 case OMP_CLAUSE_LINEAR:
4426 if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c)
4427 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c))
4428 lastprivate_firstprivate = true;
4429 break;
4430 case OMP_CLAUSE_REDUCTION:
4431 if (OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
4432 reduction_omp_orig_ref = true;
4433 break;
4434 case OMP_CLAUSE__LOOPTEMP_:
4435 /* Handle _looptemp_ clauses only on parallel/task. */
4436 if (fd)
4437 continue;
4438 break;
4439 case OMP_CLAUSE_LASTPRIVATE:
4440 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
4441 {
4442 lastprivate_firstprivate = true;
4443 if (pass != 0 || is_taskloop_ctx (ctx))
4444 continue;
4445 }
4446 /* Even without corresponding firstprivate, if
4447 decl is Fortran allocatable, it needs outer var
4448 reference. */
4449 else if (pass == 0
4450 && lang_hooks.decls.omp_private_outer_ref
4451 (OMP_CLAUSE_DECL (c)))
4452 lastprivate_firstprivate = true;
4453 break;
4454 case OMP_CLAUSE_ALIGNED:
4455 if (pass == 0)
4456 continue;
4457 var = OMP_CLAUSE_DECL (c);
4458 if (TREE_CODE (TREE_TYPE (var)) == POINTER_TYPE
4459 && !is_global_var (var))
4460 {
4461 new_var = maybe_lookup_decl (var, ctx);
4462 if (new_var == NULL_TREE)
4463 new_var = maybe_lookup_decl_in_outer_ctx (var, ctx);
4464 x = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
4465 x = build_call_expr_loc (clause_loc, x, 2, new_var,
4466 omp_clause_aligned_alignment (c));
4467 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4468 x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
4469 gimplify_and_add (x, ilist);
4470 }
4471 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
4472 && is_global_var (var))
4473 {
4474 tree ptype = build_pointer_type (TREE_TYPE (var)), t, t2;
4475 new_var = lookup_decl (var, ctx);
4476 t = maybe_lookup_decl_in_outer_ctx (var, ctx);
4477 t = build_fold_addr_expr_loc (clause_loc, t);
4478 t2 = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
4479 t = build_call_expr_loc (clause_loc, t2, 2, t,
4480 omp_clause_aligned_alignment (c));
4481 t = fold_convert_loc (clause_loc, ptype, t);
4482 x = create_tmp_var (ptype);
4483 t = build2 (MODIFY_EXPR, ptype, x, t);
4484 gimplify_and_add (t, ilist);
4485 t = build_simple_mem_ref_loc (clause_loc, x);
4486 SET_DECL_VALUE_EXPR (new_var, t);
4487 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4488 }
4489 continue;
4490 default:
4491 continue;
4492 }
4493
4494 new_var = var = OMP_CLAUSE_DECL (c);
4495 if (c_kind == OMP_CLAUSE_REDUCTION && TREE_CODE (var) == MEM_REF)
4496 {
4497 var = TREE_OPERAND (var, 0);
4498 if (TREE_CODE (var) == POINTER_PLUS_EXPR)
4499 var = TREE_OPERAND (var, 0);
4500 if (TREE_CODE (var) == INDIRECT_REF
4501 || TREE_CODE (var) == ADDR_EXPR)
4502 var = TREE_OPERAND (var, 0);
4503 if (is_variable_sized (var))
4504 {
4505 gcc_assert (DECL_HAS_VALUE_EXPR_P (var));
4506 var = DECL_VALUE_EXPR (var);
4507 gcc_assert (TREE_CODE (var) == INDIRECT_REF);
4508 var = TREE_OPERAND (var, 0);
4509 gcc_assert (DECL_P (var));
4510 }
4511 new_var = var;
4512 }
4513 if (c_kind != OMP_CLAUSE_COPYIN)
4514 new_var = lookup_decl (var, ctx);
4515
4516 if (c_kind == OMP_CLAUSE_SHARED || c_kind == OMP_CLAUSE_COPYIN)
4517 {
4518 if (pass != 0)
4519 continue;
4520 }
4521 /* C/C++ array section reductions. */
4522 else if (c_kind == OMP_CLAUSE_REDUCTION
4523 && var != OMP_CLAUSE_DECL (c))
4524 {
4525 if (pass == 0)
4526 continue;
4527
4528 tree bias = TREE_OPERAND (OMP_CLAUSE_DECL (c), 1);
4529 tree orig_var = TREE_OPERAND (OMP_CLAUSE_DECL (c), 0);
4530 if (TREE_CODE (orig_var) == POINTER_PLUS_EXPR)
4531 {
4532 tree b = TREE_OPERAND (orig_var, 1);
4533 b = maybe_lookup_decl (b, ctx);
4534 if (b == NULL)
4535 {
4536 b = TREE_OPERAND (orig_var, 1);
4537 b = maybe_lookup_decl_in_outer_ctx (b, ctx);
4538 }
4539 if (integer_zerop (bias))
4540 bias = b;
4541 else
4542 {
4543 bias = fold_convert_loc (clause_loc,
4544 TREE_TYPE (b), bias);
4545 bias = fold_build2_loc (clause_loc, PLUS_EXPR,
4546 TREE_TYPE (b), b, bias);
4547 }
4548 orig_var = TREE_OPERAND (orig_var, 0);
4549 }
4550 if (TREE_CODE (orig_var) == INDIRECT_REF
4551 || TREE_CODE (orig_var) == ADDR_EXPR)
4552 orig_var = TREE_OPERAND (orig_var, 0);
4553 tree d = OMP_CLAUSE_DECL (c);
4554 tree type = TREE_TYPE (d);
4555 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
4556 tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
4557 const char *name = get_name (orig_var);
4558 if (TREE_CONSTANT (v))
4559 {
4560 x = create_tmp_var_raw (type, name);
4561 gimple_add_tmp_var (x);
4562 TREE_ADDRESSABLE (x) = 1;
4563 x = build_fold_addr_expr_loc (clause_loc, x);
4564 }
4565 else
4566 {
4567 tree atmp
4568 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4569 tree t = maybe_lookup_decl (v, ctx);
4570 if (t)
4571 v = t;
4572 else
4573 v = maybe_lookup_decl_in_outer_ctx (v, ctx);
4574 gimplify_expr (&v, ilist, NULL, is_gimple_val, fb_rvalue);
4575 t = fold_build2_loc (clause_loc, PLUS_EXPR,
4576 TREE_TYPE (v), v,
4577 build_int_cst (TREE_TYPE (v), 1));
4578 t = fold_build2_loc (clause_loc, MULT_EXPR,
4579 TREE_TYPE (v), t,
4580 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4581 tree al = size_int (TYPE_ALIGN (TREE_TYPE (type)));
4582 x = build_call_expr_loc (clause_loc, atmp, 2, t, al);
4583 }
4584
4585 tree ptype = build_pointer_type (TREE_TYPE (type));
4586 x = fold_convert_loc (clause_loc, ptype, x);
4587 tree y = create_tmp_var (ptype, name);
4588 gimplify_assign (y, x, ilist);
4589 x = y;
4590 tree yb = y;
4591
4592 if (!integer_zerop (bias))
4593 {
4594 bias = fold_convert_loc (clause_loc, pointer_sized_int_node,
4595 bias);
4596 yb = fold_convert_loc (clause_loc, pointer_sized_int_node,
4597 x);
4598 yb = fold_build2_loc (clause_loc, MINUS_EXPR,
4599 pointer_sized_int_node, yb, bias);
4600 x = fold_convert_loc (clause_loc, TREE_TYPE (x), yb);
4601 yb = create_tmp_var (ptype, name);
4602 gimplify_assign (yb, x, ilist);
4603 x = yb;
4604 }
4605
4606 d = TREE_OPERAND (d, 0);
4607 if (TREE_CODE (d) == POINTER_PLUS_EXPR)
4608 d = TREE_OPERAND (d, 0);
4609 if (TREE_CODE (d) == ADDR_EXPR)
4610 {
4611 if (orig_var != var)
4612 {
4613 gcc_assert (is_variable_sized (orig_var));
4614 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var),
4615 x);
4616 gimplify_assign (new_var, x, ilist);
4617 tree new_orig_var = lookup_decl (orig_var, ctx);
4618 tree t = build_fold_indirect_ref (new_var);
4619 DECL_IGNORED_P (new_var) = 0;
4620 TREE_THIS_NOTRAP (t);
4621 SET_DECL_VALUE_EXPR (new_orig_var, t);
4622 DECL_HAS_VALUE_EXPR_P (new_orig_var) = 1;
4623 }
4624 else
4625 {
4626 x = build2 (MEM_REF, TREE_TYPE (new_var), x,
4627 build_int_cst (ptype, 0));
4628 SET_DECL_VALUE_EXPR (new_var, x);
4629 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4630 }
4631 }
4632 else
4633 {
4634 gcc_assert (orig_var == var);
4635 if (TREE_CODE (d) == INDIRECT_REF)
4636 {
4637 x = create_tmp_var (ptype, name);
4638 TREE_ADDRESSABLE (x) = 1;
4639 gimplify_assign (x, yb, ilist);
4640 x = build_fold_addr_expr_loc (clause_loc, x);
4641 }
4642 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4643 gimplify_assign (new_var, x, ilist);
4644 }
4645 tree y1 = create_tmp_var (ptype, NULL);
4646 gimplify_assign (y1, y, ilist);
4647 tree i2 = NULL_TREE, y2 = NULL_TREE;
4648 tree body2 = NULL_TREE, end2 = NULL_TREE;
4649 tree y3 = NULL_TREE, y4 = NULL_TREE;
4650 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) || is_simd)
4651 {
4652 y2 = create_tmp_var (ptype, NULL);
4653 gimplify_assign (y2, y, ilist);
4654 tree ref = build_outer_var_ref (var, ctx);
4655 /* For ref build_outer_var_ref already performs this. */
4656 if (TREE_CODE (d) == INDIRECT_REF)
4657 gcc_assert (is_reference (var));
4658 else if (TREE_CODE (d) == ADDR_EXPR)
4659 ref = build_fold_addr_expr (ref);
4660 else if (is_reference (var))
4661 ref = build_fold_addr_expr (ref);
4662 ref = fold_convert_loc (clause_loc, ptype, ref);
4663 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
4664 && OMP_CLAUSE_REDUCTION_OMP_ORIG_REF (c))
4665 {
4666 y3 = create_tmp_var (ptype, NULL);
4667 gimplify_assign (y3, unshare_expr (ref), ilist);
4668 }
4669 if (is_simd)
4670 {
4671 y4 = create_tmp_var (ptype, NULL);
4672 gimplify_assign (y4, ref, dlist);
4673 }
4674 }
4675 tree i = create_tmp_var (TREE_TYPE (v), NULL);
4676 gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), ilist);
4677 tree body = create_artificial_label (UNKNOWN_LOCATION);
4678 tree end = create_artificial_label (UNKNOWN_LOCATION);
4679 gimple_seq_add_stmt (ilist, gimple_build_label (body));
4680 if (y2)
4681 {
4682 i2 = create_tmp_var (TREE_TYPE (v), NULL);
4683 gimplify_assign (i2, build_int_cst (TREE_TYPE (v), 0), dlist);
4684 body2 = create_artificial_label (UNKNOWN_LOCATION);
4685 end2 = create_artificial_label (UNKNOWN_LOCATION);
4686 gimple_seq_add_stmt (dlist, gimple_build_label (body2));
4687 }
4688 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
4689 {
4690 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
4691 tree decl_placeholder
4692 = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
4693 SET_DECL_VALUE_EXPR (decl_placeholder,
4694 build_simple_mem_ref (y1));
4695 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 1;
4696 SET_DECL_VALUE_EXPR (placeholder,
4697 y3 ? build_simple_mem_ref (y3)
4698 : error_mark_node);
4699 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
4700 x = lang_hooks.decls.omp_clause_default_ctor
4701 (c, build_simple_mem_ref (y1),
4702 y3 ? build_simple_mem_ref (y3) : NULL_TREE);
4703 if (x)
4704 gimplify_and_add (x, ilist);
4705 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
4706 {
4707 gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
4708 lower_omp (&tseq, ctx);
4709 gimple_seq_add_seq (ilist, tseq);
4710 }
4711 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
4712 if (is_simd)
4713 {
4714 SET_DECL_VALUE_EXPR (decl_placeholder,
4715 build_simple_mem_ref (y2));
4716 SET_DECL_VALUE_EXPR (placeholder,
4717 build_simple_mem_ref (y4));
4718 gimple_seq tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
4719 lower_omp (&tseq, ctx);
4720 gimple_seq_add_seq (dlist, tseq);
4721 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
4722 }
4723 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
4724 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 0;
4725 x = lang_hooks.decls.omp_clause_dtor
4726 (c, build_simple_mem_ref (y2));
4727 if (x)
4728 {
4729 gimple_seq tseq = NULL;
4730 dtor = x;
4731 gimplify_stmt (&dtor, &tseq);
4732 gimple_seq_add_seq (dlist, tseq);
4733 }
4734 }
4735 else
4736 {
4737 x = omp_reduction_init (c, TREE_TYPE (type));
4738 enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
4739
4740 /* reduction(-:var) sums up the partial results, so it
4741 acts identically to reduction(+:var). */
4742 if (code == MINUS_EXPR)
4743 code = PLUS_EXPR;
4744
4745 gimplify_assign (build_simple_mem_ref (y1), x, ilist);
4746 if (is_simd)
4747 {
4748 x = build2 (code, TREE_TYPE (type),
4749 build_simple_mem_ref (y4),
4750 build_simple_mem_ref (y2));
4751 gimplify_assign (build_simple_mem_ref (y4), x, dlist);
4752 }
4753 }
4754 gimple *g
4755 = gimple_build_assign (y1, POINTER_PLUS_EXPR, y1,
4756 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4757 gimple_seq_add_stmt (ilist, g);
4758 if (y3)
4759 {
4760 g = gimple_build_assign (y3, POINTER_PLUS_EXPR, y3,
4761 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4762 gimple_seq_add_stmt (ilist, g);
4763 }
4764 g = gimple_build_assign (i, PLUS_EXPR, i,
4765 build_int_cst (TREE_TYPE (i), 1));
4766 gimple_seq_add_stmt (ilist, g);
4767 g = gimple_build_cond (LE_EXPR, i, v, body, end);
4768 gimple_seq_add_stmt (ilist, g);
4769 gimple_seq_add_stmt (ilist, gimple_build_label (end));
4770 if (y2)
4771 {
4772 g = gimple_build_assign (y2, POINTER_PLUS_EXPR, y2,
4773 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4774 gimple_seq_add_stmt (dlist, g);
4775 if (y4)
4776 {
4777 g = gimple_build_assign
4778 (y4, POINTER_PLUS_EXPR, y4,
4779 TYPE_SIZE_UNIT (TREE_TYPE (type)));
4780 gimple_seq_add_stmt (dlist, g);
4781 }
4782 g = gimple_build_assign (i2, PLUS_EXPR, i2,
4783 build_int_cst (TREE_TYPE (i2), 1));
4784 gimple_seq_add_stmt (dlist, g);
4785 g = gimple_build_cond (LE_EXPR, i2, v, body2, end2);
4786 gimple_seq_add_stmt (dlist, g);
4787 gimple_seq_add_stmt (dlist, gimple_build_label (end2));
4788 }
4789 continue;
4790 }
4791 else if (is_variable_sized (var))
4792 {
4793 /* For variable sized types, we need to allocate the
4794 actual storage here. Call alloca and store the
4795 result in the pointer decl that we created elsewhere. */
4796 if (pass == 0)
4797 continue;
4798
4799 if (c_kind != OMP_CLAUSE_FIRSTPRIVATE || !is_task_ctx (ctx))
4800 {
4801 gcall *stmt;
4802 tree tmp, atmp;
4803
4804 ptr = DECL_VALUE_EXPR (new_var);
4805 gcc_assert (TREE_CODE (ptr) == INDIRECT_REF);
4806 ptr = TREE_OPERAND (ptr, 0);
4807 gcc_assert (DECL_P (ptr));
4808 x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
4809
4810 /* void *tmp = __builtin_alloca */
4811 atmp = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4812 stmt = gimple_build_call (atmp, 2, x,
4813 size_int (DECL_ALIGN (var)));
4814 tmp = create_tmp_var_raw (ptr_type_node);
4815 gimple_add_tmp_var (tmp);
4816 gimple_call_set_lhs (stmt, tmp);
4817
4818 gimple_seq_add_stmt (ilist, stmt);
4819
4820 x = fold_convert_loc (clause_loc, TREE_TYPE (ptr), tmp);
4821 gimplify_assign (ptr, x, ilist);
4822 }
4823 }
4824 else if (is_reference (var) && !is_oacc_parallel (ctx))
4825 {
4826 /* For references that are being privatized for Fortran,
4827 allocate new backing storage for the new pointer
4828 variable. This allows us to avoid changing all the
4829 code that expects a pointer to something that expects
4830 a direct variable. */
4831 if (pass == 0)
4832 continue;
4833
4834 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
4835 if (c_kind == OMP_CLAUSE_FIRSTPRIVATE && is_task_ctx (ctx))
4836 {
4837 x = build_receiver_ref (var, false, ctx);
4838 x = build_fold_addr_expr_loc (clause_loc, x);
4839 }
4840 else if (TREE_CONSTANT (x))
4841 {
4842 /* For reduction in SIMD loop, defer adding the
4843 initialization of the reference, because if we decide
4844 to use SIMD array for it, the initilization could cause
4845 expansion ICE. */
4846 if (c_kind == OMP_CLAUSE_REDUCTION && is_simd)
4847 x = NULL_TREE;
4848 else
4849 {
4850 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
4851 get_name (var));
4852 gimple_add_tmp_var (x);
4853 TREE_ADDRESSABLE (x) = 1;
4854 x = build_fold_addr_expr_loc (clause_loc, x);
4855 }
4856 }
4857 else
4858 {
4859 tree atmp
4860 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
4861 tree rtype = TREE_TYPE (TREE_TYPE (new_var));
4862 tree al = size_int (TYPE_ALIGN (rtype));
4863 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
4864 }
4865
4866 if (x)
4867 {
4868 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
4869 gimplify_assign (new_var, x, ilist);
4870 }
4871
4872 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
4873 }
4874 else if (c_kind == OMP_CLAUSE_REDUCTION
4875 && OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
4876 {
4877 if (pass == 0)
4878 continue;
4879 }
4880 else if (pass != 0)
4881 continue;
4882
4883 switch (OMP_CLAUSE_CODE (c))
4884 {
4885 case OMP_CLAUSE_SHARED:
4886 /* Ignore shared directives in teams construct. */
4887 if (gimple_code (ctx->stmt) == GIMPLE_OMP_TEAMS)
4888 continue;
4889 /* Shared global vars are just accessed directly. */
4890 if (is_global_var (new_var))
4891 break;
4892 /* For taskloop firstprivate/lastprivate, represented
4893 as firstprivate and shared clause on the task, new_var
4894 is the firstprivate var. */
4895 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
4896 break;
4897 /* Set up the DECL_VALUE_EXPR for shared variables now. This
4898 needs to be delayed until after fixup_child_record_type so
4899 that we get the correct type during the dereference. */
4900 by_ref = use_pointer_for_field (var, ctx);
4901 x = build_receiver_ref (var, by_ref, ctx);
4902 SET_DECL_VALUE_EXPR (new_var, x);
4903 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
4904
4905 /* ??? If VAR is not passed by reference, and the variable
4906 hasn't been initialized yet, then we'll get a warning for
4907 the store into the omp_data_s structure. Ideally, we'd be
4908 able to notice this and not store anything at all, but
4909 we're generating code too early. Suppress the warning. */
4910 if (!by_ref)
4911 TREE_NO_WARNING (var) = 1;
4912 break;
4913
4914 case OMP_CLAUSE_LASTPRIVATE:
4915 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
4916 break;
4917 /* FALLTHRU */
4918
4919 case OMP_CLAUSE_PRIVATE:
4920 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_PRIVATE)
4921 x = build_outer_var_ref (var, ctx);
4922 else if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
4923 {
4924 if (is_task_ctx (ctx))
4925 x = build_receiver_ref (var, false, ctx);
4926 else
4927 x = build_outer_var_ref (var, ctx);
4928 }
4929 else
4930 x = NULL;
4931 do_private:
4932 tree nx;
4933 nx = lang_hooks.decls.omp_clause_default_ctor
4934 (c, unshare_expr (new_var), x);
4935 if (is_simd)
4936 {
4937 tree y = lang_hooks.decls.omp_clause_dtor (c, new_var);
4938 if ((TREE_ADDRESSABLE (new_var) || nx || y
4939 || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
4940 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
4941 idx, lane, ivar, lvar))
4942 {
4943 if (nx)
4944 x = lang_hooks.decls.omp_clause_default_ctor
4945 (c, unshare_expr (ivar), x);
4946 if (nx && x)
4947 gimplify_and_add (x, &llist[0]);
4948 if (y)
4949 {
4950 y = lang_hooks.decls.omp_clause_dtor (c, ivar);
4951 if (y)
4952 {
4953 gimple_seq tseq = NULL;
4954
4955 dtor = y;
4956 gimplify_stmt (&dtor, &tseq);
4957 gimple_seq_add_seq (&llist[1], tseq);
4958 }
4959 }
4960 break;
4961 }
4962 }
4963 if (nx)
4964 gimplify_and_add (nx, ilist);
4965 /* FALLTHRU */
4966
4967 do_dtor:
4968 x = lang_hooks.decls.omp_clause_dtor (c, new_var);
4969 if (x)
4970 {
4971 gimple_seq tseq = NULL;
4972
4973 dtor = x;
4974 gimplify_stmt (&dtor, &tseq);
4975 gimple_seq_add_seq (dlist, tseq);
4976 }
4977 break;
4978
4979 case OMP_CLAUSE_LINEAR:
4980 if (!OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4981 goto do_firstprivate;
4982 if (OMP_CLAUSE_LINEAR_NO_COPYOUT (c))
4983 x = NULL;
4984 else
4985 x = build_outer_var_ref (var, ctx);
4986 goto do_private;
4987
4988 case OMP_CLAUSE_FIRSTPRIVATE:
4989 if (is_task_ctx (ctx))
4990 {
4991 if (is_reference (var) || is_variable_sized (var))
4992 goto do_dtor;
4993 else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var,
4994 ctx))
4995 || use_pointer_for_field (var, NULL))
4996 {
4997 x = build_receiver_ref (var, false, ctx);
4998 SET_DECL_VALUE_EXPR (new_var, x);
4999 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
5000 goto do_dtor;
5001 }
5002 }
5003 do_firstprivate:
5004 x = build_outer_var_ref (var, ctx);
5005 if (is_simd)
5006 {
5007 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5008 && gimple_omp_for_combined_into_p (ctx->stmt))
5009 {
5010 tree t = OMP_CLAUSE_LINEAR_STEP (c);
5011 tree stept = TREE_TYPE (t);
5012 tree ct = find_omp_clause (clauses,
5013 OMP_CLAUSE__LOOPTEMP_);
5014 gcc_assert (ct);
5015 tree l = OMP_CLAUSE_DECL (ct);
5016 tree n1 = fd->loop.n1;
5017 tree step = fd->loop.step;
5018 tree itype = TREE_TYPE (l);
5019 if (POINTER_TYPE_P (itype))
5020 itype = signed_type_for (itype);
5021 l = fold_build2 (MINUS_EXPR, itype, l, n1);
5022 if (TYPE_UNSIGNED (itype)
5023 && fd->loop.cond_code == GT_EXPR)
5024 l = fold_build2 (TRUNC_DIV_EXPR, itype,
5025 fold_build1 (NEGATE_EXPR, itype, l),
5026 fold_build1 (NEGATE_EXPR,
5027 itype, step));
5028 else
5029 l = fold_build2 (TRUNC_DIV_EXPR, itype, l, step);
5030 t = fold_build2 (MULT_EXPR, stept,
5031 fold_convert (stept, l), t);
5032
5033 if (OMP_CLAUSE_LINEAR_ARRAY (c))
5034 {
5035 x = lang_hooks.decls.omp_clause_linear_ctor
5036 (c, new_var, x, t);
5037 gimplify_and_add (x, ilist);
5038 goto do_dtor;
5039 }
5040
5041 if (POINTER_TYPE_P (TREE_TYPE (x)))
5042 x = fold_build2 (POINTER_PLUS_EXPR,
5043 TREE_TYPE (x), x, t);
5044 else
5045 x = fold_build2 (PLUS_EXPR, TREE_TYPE (x), x, t);
5046 }
5047
5048 if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_LINEAR
5049 || TREE_ADDRESSABLE (new_var))
5050 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5051 idx, lane, ivar, lvar))
5052 {
5053 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR)
5054 {
5055 tree iv = create_tmp_var (TREE_TYPE (new_var));
5056 x = lang_hooks.decls.omp_clause_copy_ctor (c, iv, x);
5057 gimplify_and_add (x, ilist);
5058 gimple_stmt_iterator gsi
5059 = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
5060 gassign *g
5061 = gimple_build_assign (unshare_expr (lvar), iv);
5062 gsi_insert_before_without_update (&gsi, g,
5063 GSI_SAME_STMT);
5064 tree t = OMP_CLAUSE_LINEAR_STEP (c);
5065 enum tree_code code = PLUS_EXPR;
5066 if (POINTER_TYPE_P (TREE_TYPE (new_var)))
5067 code = POINTER_PLUS_EXPR;
5068 g = gimple_build_assign (iv, code, iv, t);
5069 gsi_insert_before_without_update (&gsi, g,
5070 GSI_SAME_STMT);
5071 break;
5072 }
5073 x = lang_hooks.decls.omp_clause_copy_ctor
5074 (c, unshare_expr (ivar), x);
5075 gimplify_and_add (x, &llist[0]);
5076 x = lang_hooks.decls.omp_clause_dtor (c, ivar);
5077 if (x)
5078 {
5079 gimple_seq tseq = NULL;
5080
5081 dtor = x;
5082 gimplify_stmt (&dtor, &tseq);
5083 gimple_seq_add_seq (&llist[1], tseq);
5084 }
5085 break;
5086 }
5087 }
5088 x = lang_hooks.decls.omp_clause_copy_ctor
5089 (c, unshare_expr (new_var), x);
5090 gimplify_and_add (x, ilist);
5091 goto do_dtor;
5092
5093 case OMP_CLAUSE__LOOPTEMP_:
5094 gcc_assert (is_taskreg_ctx (ctx));
5095 x = build_outer_var_ref (var, ctx);
5096 x = build2 (MODIFY_EXPR, TREE_TYPE (new_var), new_var, x);
5097 gimplify_and_add (x, ilist);
5098 break;
5099
5100 case OMP_CLAUSE_COPYIN:
5101 by_ref = use_pointer_for_field (var, NULL);
5102 x = build_receiver_ref (var, by_ref, ctx);
5103 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, x);
5104 append_to_statement_list (x, &copyin_seq);
5105 copyin_by_ref |= by_ref;
5106 break;
5107
5108 case OMP_CLAUSE_REDUCTION:
5109 /* OpenACC reductions are initialized using the
5110 GOACC_REDUCTION internal function. */
5111 if (is_gimple_omp_oacc (ctx->stmt))
5112 break;
5113 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
5114 {
5115 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
5116 gimple *tseq;
5117 x = build_outer_var_ref (var, ctx);
5118
5119 if (is_reference (var)
5120 && !useless_type_conversion_p (TREE_TYPE (placeholder),
5121 TREE_TYPE (x)))
5122 x = build_fold_addr_expr_loc (clause_loc, x);
5123 SET_DECL_VALUE_EXPR (placeholder, x);
5124 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
5125 tree new_vard = new_var;
5126 if (is_reference (var))
5127 {
5128 gcc_assert (TREE_CODE (new_var) == MEM_REF);
5129 new_vard = TREE_OPERAND (new_var, 0);
5130 gcc_assert (DECL_P (new_vard));
5131 }
5132 if (is_simd
5133 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5134 idx, lane, ivar, lvar))
5135 {
5136 if (new_vard == new_var)
5137 {
5138 gcc_assert (DECL_VALUE_EXPR (new_var) == lvar);
5139 SET_DECL_VALUE_EXPR (new_var, ivar);
5140 }
5141 else
5142 {
5143 SET_DECL_VALUE_EXPR (new_vard,
5144 build_fold_addr_expr (ivar));
5145 DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
5146 }
5147 x = lang_hooks.decls.omp_clause_default_ctor
5148 (c, unshare_expr (ivar),
5149 build_outer_var_ref (var, ctx));
5150 if (x)
5151 gimplify_and_add (x, &llist[0]);
5152 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
5153 {
5154 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
5155 lower_omp (&tseq, ctx);
5156 gimple_seq_add_seq (&llist[0], tseq);
5157 }
5158 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
5159 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
5160 lower_omp (&tseq, ctx);
5161 gimple_seq_add_seq (&llist[1], tseq);
5162 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5163 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
5164 if (new_vard == new_var)
5165 SET_DECL_VALUE_EXPR (new_var, lvar);
5166 else
5167 SET_DECL_VALUE_EXPR (new_vard,
5168 build_fold_addr_expr (lvar));
5169 x = lang_hooks.decls.omp_clause_dtor (c, ivar);
5170 if (x)
5171 {
5172 tseq = NULL;
5173 dtor = x;
5174 gimplify_stmt (&dtor, &tseq);
5175 gimple_seq_add_seq (&llist[1], tseq);
5176 }
5177 break;
5178 }
5179 /* If this is a reference to constant size reduction var
5180 with placeholder, we haven't emitted the initializer
5181 for it because it is undesirable if SIMD arrays are used.
5182 But if they aren't used, we need to emit the deferred
5183 initialization now. */
5184 else if (is_reference (var) && is_simd)
5185 handle_simd_reference (clause_loc, new_vard, ilist);
5186 x = lang_hooks.decls.omp_clause_default_ctor
5187 (c, unshare_expr (new_var),
5188 build_outer_var_ref (var, ctx));
5189 if (x)
5190 gimplify_and_add (x, ilist);
5191 if (OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c))
5192 {
5193 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c);
5194 lower_omp (&tseq, ctx);
5195 gimple_seq_add_seq (ilist, tseq);
5196 }
5197 OMP_CLAUSE_REDUCTION_GIMPLE_INIT (c) = NULL;
5198 if (is_simd)
5199 {
5200 tseq = OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c);
5201 lower_omp (&tseq, ctx);
5202 gimple_seq_add_seq (dlist, tseq);
5203 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5204 }
5205 DECL_HAS_VALUE_EXPR_P (placeholder) = 0;
5206 goto do_dtor;
5207 }
5208 else
5209 {
5210 x = omp_reduction_init (c, TREE_TYPE (new_var));
5211 gcc_assert (TREE_CODE (TREE_TYPE (new_var)) != ARRAY_TYPE);
5212 enum tree_code code = OMP_CLAUSE_REDUCTION_CODE (c);
5213
5214 /* reduction(-:var) sums up the partial results, so it
5215 acts identically to reduction(+:var). */
5216 if (code == MINUS_EXPR)
5217 code = PLUS_EXPR;
5218
5219 tree new_vard = new_var;
5220 if (is_simd && is_reference (var))
5221 {
5222 gcc_assert (TREE_CODE (new_var) == MEM_REF);
5223 new_vard = TREE_OPERAND (new_var, 0);
5224 gcc_assert (DECL_P (new_vard));
5225 }
5226 if (is_simd
5227 && lower_rec_simd_input_clauses (new_var, ctx, max_vf,
5228 idx, lane, ivar, lvar))
5229 {
5230 tree ref = build_outer_var_ref (var, ctx);
5231
5232 gimplify_assign (unshare_expr (ivar), x, &llist[0]);
5233
5234 x = build2 (code, TREE_TYPE (ref), ref, ivar);
5235 ref = build_outer_var_ref (var, ctx);
5236 gimplify_assign (ref, x, &llist[1]);
5237
5238 if (new_vard != new_var)
5239 {
5240 SET_DECL_VALUE_EXPR (new_vard,
5241 build_fold_addr_expr (lvar));
5242 DECL_HAS_VALUE_EXPR_P (new_vard) = 1;
5243 }
5244 }
5245 else
5246 {
5247 if (is_reference (var) && is_simd)
5248 handle_simd_reference (clause_loc, new_vard, ilist);
5249 gimplify_assign (new_var, x, ilist);
5250 if (is_simd)
5251 {
5252 tree ref = build_outer_var_ref (var, ctx);
5253
5254 x = build2 (code, TREE_TYPE (ref), ref, new_var);
5255 ref = build_outer_var_ref (var, ctx);
5256 gimplify_assign (ref, x, dlist);
5257 }
5258 }
5259 }
5260 break;
5261
5262 default:
5263 gcc_unreachable ();
5264 }
5265 }
5266 }
5267
5268 if (lane)
5269 {
5270 tree uid = create_tmp_var (ptr_type_node, "simduid");
5271 /* Don't want uninit warnings on simduid, it is always uninitialized,
5272 but we use it not for the value, but for the DECL_UID only. */
5273 TREE_NO_WARNING (uid) = 1;
5274 gimple *g
5275 = gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
5276 gimple_call_set_lhs (g, lane);
5277 gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
5278 gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
5279 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
5280 OMP_CLAUSE__SIMDUID__DECL (c) = uid;
5281 OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
5282 gimple_omp_for_set_clauses (ctx->stmt, c);
5283 g = gimple_build_assign (lane, INTEGER_CST,
5284 build_int_cst (unsigned_type_node, 0));
5285 gimple_seq_add_stmt (ilist, g);
5286 for (int i = 0; i < 2; i++)
5287 if (llist[i])
5288 {
5289 tree vf = create_tmp_var (unsigned_type_node);
5290 g = gimple_build_call_internal (IFN_GOMP_SIMD_VF, 1, uid);
5291 gimple_call_set_lhs (g, vf);
5292 gimple_seq *seq = i == 0 ? ilist : dlist;
5293 gimple_seq_add_stmt (seq, g);
5294 tree t = build_int_cst (unsigned_type_node, 0);
5295 g = gimple_build_assign (idx, INTEGER_CST, t);
5296 gimple_seq_add_stmt (seq, g);
5297 tree body = create_artificial_label (UNKNOWN_LOCATION);
5298 tree header = create_artificial_label (UNKNOWN_LOCATION);
5299 tree end = create_artificial_label (UNKNOWN_LOCATION);
5300 gimple_seq_add_stmt (seq, gimple_build_goto (header));
5301 gimple_seq_add_stmt (seq, gimple_build_label (body));
5302 gimple_seq_add_seq (seq, llist[i]);
5303 t = build_int_cst (unsigned_type_node, 1);
5304 g = gimple_build_assign (idx, PLUS_EXPR, idx, t);
5305 gimple_seq_add_stmt (seq, g);
5306 gimple_seq_add_stmt (seq, gimple_build_label (header));
5307 g = gimple_build_cond (LT_EXPR, idx, vf, body, end);
5308 gimple_seq_add_stmt (seq, g);
5309 gimple_seq_add_stmt (seq, gimple_build_label (end));
5310 }
5311 }
5312
5313 /* The copyin sequence is not to be executed by the main thread, since
5314 that would result in self-copies. Perhaps not visible to scalars,
5315 but it certainly is to C++ operator=. */
5316 if (copyin_seq)
5317 {
5318 x = build_call_expr (builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM),
5319 0);
5320 x = build2 (NE_EXPR, boolean_type_node, x,
5321 build_int_cst (TREE_TYPE (x), 0));
5322 x = build3 (COND_EXPR, void_type_node, x, copyin_seq, NULL);
5323 gimplify_and_add (x, ilist);
5324 }
5325
5326 /* If any copyin variable is passed by reference, we must ensure the
5327 master thread doesn't modify it before it is copied over in all
5328 threads. Similarly for variables in both firstprivate and
5329 lastprivate clauses we need to ensure the lastprivate copying
5330 happens after firstprivate copying in all threads. And similarly
5331 for UDRs if initializer expression refers to omp_orig. */
5332 if (copyin_by_ref || lastprivate_firstprivate || reduction_omp_orig_ref)
5333 {
5334 /* Don't add any barrier for #pragma omp simd or
5335 #pragma omp distribute. */
5336 if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR
5337 || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR)
5338 gimple_seq_add_stmt (ilist, build_omp_barrier (NULL_TREE));
5339 }
5340
5341 /* If max_vf is non-zero, then we can use only a vectorization factor
5342 up to the max_vf we chose. So stick it into the safelen clause. */
5343 if (max_vf)
5344 {
5345 tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt),
5346 OMP_CLAUSE_SAFELEN);
5347 if (c == NULL_TREE
5348 || (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) == INTEGER_CST
5349 && compare_tree_int (OMP_CLAUSE_SAFELEN_EXPR (c),
5350 max_vf) == 1))
5351 {
5352 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
5353 OMP_CLAUSE_SAFELEN_EXPR (c) = build_int_cst (integer_type_node,
5354 max_vf);
5355 OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
5356 gimple_omp_for_set_clauses (ctx->stmt, c);
5357 }
5358 }
5359 }
5360
5361
5362 /* Generate code to implement the LASTPRIVATE clauses. This is used for
5363 both parallel and workshare constructs. PREDICATE may be NULL if it's
5364 always true. */
5365
5366 static void
5367 lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
5368 omp_context *ctx)
5369 {
5370 tree x, c, label = NULL, orig_clauses = clauses;
5371 bool par_clauses = false;
5372 tree simduid = NULL, lastlane = NULL;
5373
5374 /* Early exit if there are no lastprivate or linear clauses. */
5375 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
5376 if (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LASTPRIVATE
5377 || (OMP_CLAUSE_CODE (clauses) == OMP_CLAUSE_LINEAR
5378 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (clauses)))
5379 break;
5380 if (clauses == NULL)
5381 {
5382 /* If this was a workshare clause, see if it had been combined
5383 with its parallel. In that case, look for the clauses on the
5384 parallel statement itself. */
5385 if (is_parallel_ctx (ctx))
5386 return;
5387
5388 ctx = ctx->outer;
5389 if (ctx == NULL || !is_parallel_ctx (ctx))
5390 return;
5391
5392 clauses = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
5393 OMP_CLAUSE_LASTPRIVATE);
5394 if (clauses == NULL)
5395 return;
5396 par_clauses = true;
5397 }
5398
5399 if (predicate)
5400 {
5401 gcond *stmt;
5402 tree label_true, arm1, arm2;
5403
5404 label = create_artificial_label (UNKNOWN_LOCATION);
5405 label_true = create_artificial_label (UNKNOWN_LOCATION);
5406 arm1 = TREE_OPERAND (predicate, 0);
5407 arm2 = TREE_OPERAND (predicate, 1);
5408 gimplify_expr (&arm1, stmt_list, NULL, is_gimple_val, fb_rvalue);
5409 gimplify_expr (&arm2, stmt_list, NULL, is_gimple_val, fb_rvalue);
5410 stmt = gimple_build_cond (TREE_CODE (predicate), arm1, arm2,
5411 label_true, label);
5412 gimple_seq_add_stmt (stmt_list, stmt);
5413 gimple_seq_add_stmt (stmt_list, gimple_build_label (label_true));
5414 }
5415
5416 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
5417 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
5418 {
5419 simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_);
5420 if (simduid)
5421 simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
5422 }
5423
5424 for (c = clauses; c ;)
5425 {
5426 tree var, new_var;
5427 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
5428
5429 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5430 || (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5431 && !OMP_CLAUSE_LINEAR_NO_COPYOUT (c)))
5432 {
5433 var = OMP_CLAUSE_DECL (c);
5434 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5435 && OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)
5436 && is_taskloop_ctx (ctx))
5437 {
5438 gcc_checking_assert (ctx->outer && is_task_ctx (ctx->outer));
5439 new_var = lookup_decl (var, ctx->outer);
5440 }
5441 else
5442 new_var = lookup_decl (var, ctx);
5443
5444 if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
5445 {
5446 tree val = DECL_VALUE_EXPR (new_var);
5447 if (TREE_CODE (val) == ARRAY_REF
5448 && VAR_P (TREE_OPERAND (val, 0))
5449 && lookup_attribute ("omp simd array",
5450 DECL_ATTRIBUTES (TREE_OPERAND (val,
5451 0))))
5452 {
5453 if (lastlane == NULL)
5454 {
5455 lastlane = create_tmp_var (unsigned_type_node);
5456 gcall *g
5457 = gimple_build_call_internal (IFN_GOMP_SIMD_LAST_LANE,
5458 2, simduid,
5459 TREE_OPERAND (val, 1));
5460 gimple_call_set_lhs (g, lastlane);
5461 gimple_seq_add_stmt (stmt_list, g);
5462 }
5463 new_var = build4 (ARRAY_REF, TREE_TYPE (val),
5464 TREE_OPERAND (val, 0), lastlane,
5465 NULL_TREE, NULL_TREE);
5466 }
5467 }
5468
5469 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5470 && OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c))
5471 {
5472 lower_omp (&OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c), ctx);
5473 gimple_seq_add_seq (stmt_list,
5474 OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
5475 OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c) = NULL;
5476 }
5477 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5478 && OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c))
5479 {
5480 lower_omp (&OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c), ctx);
5481 gimple_seq_add_seq (stmt_list,
5482 OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c));
5483 OMP_CLAUSE_LINEAR_GIMPLE_SEQ (c) = NULL;
5484 }
5485
5486 x = NULL_TREE;
5487 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
5488 && OMP_CLAUSE_LASTPRIVATE_TASKLOOP_IV (c))
5489 {
5490 gcc_checking_assert (is_taskloop_ctx (ctx));
5491 tree ovar = maybe_lookup_decl_in_outer_ctx (var,
5492 ctx->outer->outer);
5493 if (is_global_var (ovar))
5494 x = ovar;
5495 }
5496 if (!x)
5497 x = build_outer_var_ref (var, ctx, true);
5498 if (is_reference (var))
5499 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5500 x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var);
5501 gimplify_and_add (x, stmt_list);
5502 }
5503 c = OMP_CLAUSE_CHAIN (c);
5504 if (c == NULL && !par_clauses)
5505 {
5506 /* If this was a workshare clause, see if it had been combined
5507 with its parallel. In that case, continue looking for the
5508 clauses also on the parallel statement itself. */
5509 if (is_parallel_ctx (ctx))
5510 break;
5511
5512 ctx = ctx->outer;
5513 if (ctx == NULL || !is_parallel_ctx (ctx))
5514 break;
5515
5516 c = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt),
5517 OMP_CLAUSE_LASTPRIVATE);
5518 par_clauses = true;
5519 }
5520 }
5521
5522 if (label)
5523 gimple_seq_add_stmt (stmt_list, gimple_build_label (label));
5524 }
5525
5526 /* Lower the OpenACC reductions of CLAUSES for compute axis LEVEL
5527 (which might be a placeholder). INNER is true if this is an inner
5528 axis of a multi-axis loop. FORK and JOIN are (optional) fork and
5529 join markers. Generate the before-loop forking sequence in
5530 FORK_SEQ and the after-loop joining sequence to JOIN_SEQ. The
5531 general form of these sequences is
5532
5533 GOACC_REDUCTION_SETUP
5534 GOACC_FORK
5535 GOACC_REDUCTION_INIT
5536 ...
5537 GOACC_REDUCTION_FINI
5538 GOACC_JOIN
5539 GOACC_REDUCTION_TEARDOWN. */
5540
5541 static void
5542 lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
5543 gcall *fork, gcall *join, gimple_seq *fork_seq,
5544 gimple_seq *join_seq, omp_context *ctx)
5545 {
5546 gimple_seq before_fork = NULL;
5547 gimple_seq after_fork = NULL;
5548 gimple_seq before_join = NULL;
5549 gimple_seq after_join = NULL;
5550 tree init_code = NULL_TREE, fini_code = NULL_TREE,
5551 setup_code = NULL_TREE, teardown_code = NULL_TREE;
5552 unsigned offset = 0;
5553
5554 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5555 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
5556 {
5557 tree orig = OMP_CLAUSE_DECL (c);
5558 tree var = maybe_lookup_decl (orig, ctx);
5559 tree ref_to_res = NULL_TREE;
5560 tree incoming, outgoing;
5561
5562 enum tree_code rcode = OMP_CLAUSE_REDUCTION_CODE (c);
5563 if (rcode == MINUS_EXPR)
5564 rcode = PLUS_EXPR;
5565 else if (rcode == TRUTH_ANDIF_EXPR)
5566 rcode = BIT_AND_EXPR;
5567 else if (rcode == TRUTH_ORIF_EXPR)
5568 rcode = BIT_IOR_EXPR;
5569 tree op = build_int_cst (unsigned_type_node, rcode);
5570
5571 if (!var)
5572 var = orig;
5573 gcc_assert (!is_reference (var));
5574
5575 incoming = outgoing = var;
5576
5577 if (!inner)
5578 {
5579 /* See if an outer construct also reduces this variable. */
5580 omp_context *outer = ctx;
5581
5582 while (omp_context *probe = outer->outer)
5583 {
5584 enum gimple_code type = gimple_code (probe->stmt);
5585 tree cls;
5586
5587 switch (type)
5588 {
5589 case GIMPLE_OMP_FOR:
5590 cls = gimple_omp_for_clauses (probe->stmt);
5591 break;
5592
5593 case GIMPLE_OMP_TARGET:
5594 if (gimple_omp_target_kind (probe->stmt)
5595 != GF_OMP_TARGET_KIND_OACC_PARALLEL)
5596 goto do_lookup;
5597
5598 cls = gimple_omp_target_clauses (probe->stmt);
5599 break;
5600
5601 default:
5602 goto do_lookup;
5603 }
5604
5605 outer = probe;
5606 for (; cls; cls = OMP_CLAUSE_CHAIN (cls))
5607 if (OMP_CLAUSE_CODE (cls) == OMP_CLAUSE_REDUCTION
5608 && orig == OMP_CLAUSE_DECL (cls))
5609 goto has_outer_reduction;
5610 }
5611
5612 do_lookup:
5613 /* This is the outermost construct with this reduction,
5614 see if there's a mapping for it. */
5615 if (gimple_code (outer->stmt) == GIMPLE_OMP_TARGET
5616 && maybe_lookup_field (orig, outer))
5617 {
5618 ref_to_res = build_receiver_ref (orig, false, outer);
5619 if (is_reference (orig))
5620 ref_to_res = build_simple_mem_ref (ref_to_res);
5621
5622 outgoing = var;
5623 incoming = omp_reduction_init_op (loc, rcode, TREE_TYPE (var));
5624 }
5625 else
5626 incoming = outgoing = orig;
5627
5628 has_outer_reduction:;
5629 }
5630
5631 if (!ref_to_res)
5632 ref_to_res = integer_zero_node;
5633
5634 /* Determine position in reduction buffer, which may be used
5635 by target. */
5636 enum machine_mode mode = TYPE_MODE (TREE_TYPE (var));
5637 unsigned align = GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT;
5638 offset = (offset + align - 1) & ~(align - 1);
5639 tree off = build_int_cst (sizetype, offset);
5640 offset += GET_MODE_SIZE (mode);
5641
5642 if (!init_code)
5643 {
5644 init_code = build_int_cst (integer_type_node,
5645 IFN_GOACC_REDUCTION_INIT);
5646 fini_code = build_int_cst (integer_type_node,
5647 IFN_GOACC_REDUCTION_FINI);
5648 setup_code = build_int_cst (integer_type_node,
5649 IFN_GOACC_REDUCTION_SETUP);
5650 teardown_code = build_int_cst (integer_type_node,
5651 IFN_GOACC_REDUCTION_TEARDOWN);
5652 }
5653
5654 tree setup_call
5655 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5656 TREE_TYPE (var), 6, setup_code,
5657 unshare_expr (ref_to_res),
5658 incoming, level, op, off);
5659 tree init_call
5660 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5661 TREE_TYPE (var), 6, init_code,
5662 unshare_expr (ref_to_res),
5663 var, level, op, off);
5664 tree fini_call
5665 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5666 TREE_TYPE (var), 6, fini_code,
5667 unshare_expr (ref_to_res),
5668 var, level, op, off);
5669 tree teardown_call
5670 = build_call_expr_internal_loc (loc, IFN_GOACC_REDUCTION,
5671 TREE_TYPE (var), 6, teardown_code,
5672 ref_to_res, var, level, op, off);
5673
5674 gimplify_assign (var, setup_call, &before_fork);
5675 gimplify_assign (var, init_call, &after_fork);
5676 gimplify_assign (var, fini_call, &before_join);
5677 gimplify_assign (outgoing, teardown_call, &after_join);
5678 }
5679
5680 /* Now stitch things together. */
5681 gimple_seq_add_seq (fork_seq, before_fork);
5682 if (fork)
5683 gimple_seq_add_stmt (fork_seq, fork);
5684 gimple_seq_add_seq (fork_seq, after_fork);
5685
5686 gimple_seq_add_seq (join_seq, before_join);
5687 if (join)
5688 gimple_seq_add_stmt (join_seq, join);
5689 gimple_seq_add_seq (join_seq, after_join);
5690 }
5691
5692 /* Generate code to implement the REDUCTION clauses. */
5693
5694 static void
5695 lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx)
5696 {
5697 gimple_seq sub_seq = NULL;
5698 gimple *stmt;
5699 tree x, c;
5700 int count = 0;
5701
5702 /* OpenACC loop reductions are handled elsewhere. */
5703 if (is_gimple_omp_oacc (ctx->stmt))
5704 return;
5705
5706 /* SIMD reductions are handled in lower_rec_input_clauses. */
5707 if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
5708 && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD)
5709 return;
5710
5711 /* First see if there is exactly one reduction clause. Use OMP_ATOMIC
5712 update in that case, otherwise use a lock. */
5713 for (c = clauses; c && count < 2; c = OMP_CLAUSE_CHAIN (c))
5714 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
5715 {
5716 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c)
5717 || TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
5718 {
5719 /* Never use OMP_ATOMIC for array reductions or UDRs. */
5720 count = -1;
5721 break;
5722 }
5723 count++;
5724 }
5725
5726 if (count == 0)
5727 return;
5728
5729 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
5730 {
5731 tree var, ref, new_var, orig_var;
5732 enum tree_code code;
5733 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
5734
5735 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION)
5736 continue;
5737
5738 orig_var = var = OMP_CLAUSE_DECL (c);
5739 if (TREE_CODE (var) == MEM_REF)
5740 {
5741 var = TREE_OPERAND (var, 0);
5742 if (TREE_CODE (var) == POINTER_PLUS_EXPR)
5743 var = TREE_OPERAND (var, 0);
5744 if (TREE_CODE (var) == INDIRECT_REF
5745 || TREE_CODE (var) == ADDR_EXPR)
5746 var = TREE_OPERAND (var, 0);
5747 orig_var = var;
5748 if (is_variable_sized (var))
5749 {
5750 gcc_assert (DECL_HAS_VALUE_EXPR_P (var));
5751 var = DECL_VALUE_EXPR (var);
5752 gcc_assert (TREE_CODE (var) == INDIRECT_REF);
5753 var = TREE_OPERAND (var, 0);
5754 gcc_assert (DECL_P (var));
5755 }
5756 }
5757 new_var = lookup_decl (var, ctx);
5758 if (var == OMP_CLAUSE_DECL (c) && is_reference (var))
5759 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5760 ref = build_outer_var_ref (var, ctx);
5761 code = OMP_CLAUSE_REDUCTION_CODE (c);
5762
5763 /* reduction(-:var) sums up the partial results, so it acts
5764 identically to reduction(+:var). */
5765 if (code == MINUS_EXPR)
5766 code = PLUS_EXPR;
5767
5768 if (count == 1)
5769 {
5770 tree addr = build_fold_addr_expr_loc (clause_loc, ref);
5771
5772 addr = save_expr (addr);
5773 ref = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (addr)), addr);
5774 x = fold_build2_loc (clause_loc, code, TREE_TYPE (ref), ref, new_var);
5775 x = build2 (OMP_ATOMIC, void_type_node, addr, x);
5776 gimplify_and_add (x, stmt_seqp);
5777 return;
5778 }
5779 else if (TREE_CODE (OMP_CLAUSE_DECL (c)) == MEM_REF)
5780 {
5781 tree d = OMP_CLAUSE_DECL (c);
5782 tree type = TREE_TYPE (d);
5783 tree v = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
5784 tree i = create_tmp_var (TREE_TYPE (v), NULL);
5785 tree ptype = build_pointer_type (TREE_TYPE (type));
5786 tree bias = TREE_OPERAND (d, 1);
5787 d = TREE_OPERAND (d, 0);
5788 if (TREE_CODE (d) == POINTER_PLUS_EXPR)
5789 {
5790 tree b = TREE_OPERAND (d, 1);
5791 b = maybe_lookup_decl (b, ctx);
5792 if (b == NULL)
5793 {
5794 b = TREE_OPERAND (d, 1);
5795 b = maybe_lookup_decl_in_outer_ctx (b, ctx);
5796 }
5797 if (integer_zerop (bias))
5798 bias = b;
5799 else
5800 {
5801 bias = fold_convert_loc (clause_loc, TREE_TYPE (b), bias);
5802 bias = fold_build2_loc (clause_loc, PLUS_EXPR,
5803 TREE_TYPE (b), b, bias);
5804 }
5805 d = TREE_OPERAND (d, 0);
5806 }
5807 /* For ref build_outer_var_ref already performs this, so
5808 only new_var needs a dereference. */
5809 if (TREE_CODE (d) == INDIRECT_REF)
5810 {
5811 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5812 gcc_assert (is_reference (var) && var == orig_var);
5813 }
5814 else if (TREE_CODE (d) == ADDR_EXPR)
5815 {
5816 if (orig_var == var)
5817 {
5818 new_var = build_fold_addr_expr (new_var);
5819 ref = build_fold_addr_expr (ref);
5820 }
5821 }
5822 else
5823 {
5824 gcc_assert (orig_var == var);
5825 if (is_reference (var))
5826 ref = build_fold_addr_expr (ref);
5827 }
5828 if (DECL_P (v))
5829 {
5830 tree t = maybe_lookup_decl (v, ctx);
5831 if (t)
5832 v = t;
5833 else
5834 v = maybe_lookup_decl_in_outer_ctx (v, ctx);
5835 gimplify_expr (&v, stmt_seqp, NULL, is_gimple_val, fb_rvalue);
5836 }
5837 if (!integer_zerop (bias))
5838 {
5839 bias = fold_convert_loc (clause_loc, sizetype, bias);
5840 new_var = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
5841 TREE_TYPE (new_var), new_var,
5842 unshare_expr (bias));
5843 ref = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
5844 TREE_TYPE (ref), ref, bias);
5845 }
5846 new_var = fold_convert_loc (clause_loc, ptype, new_var);
5847 ref = fold_convert_loc (clause_loc, ptype, ref);
5848 tree m = create_tmp_var (ptype, NULL);
5849 gimplify_assign (m, new_var, stmt_seqp);
5850 new_var = m;
5851 m = create_tmp_var (ptype, NULL);
5852 gimplify_assign (m, ref, stmt_seqp);
5853 ref = m;
5854 gimplify_assign (i, build_int_cst (TREE_TYPE (v), 0), stmt_seqp);
5855 tree body = create_artificial_label (UNKNOWN_LOCATION);
5856 tree end = create_artificial_label (UNKNOWN_LOCATION);
5857 gimple_seq_add_stmt (&sub_seq, gimple_build_label (body));
5858 tree priv = build_simple_mem_ref_loc (clause_loc, new_var);
5859 tree out = build_simple_mem_ref_loc (clause_loc, ref);
5860 if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
5861 {
5862 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
5863 tree decl_placeholder
5864 = OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c);
5865 SET_DECL_VALUE_EXPR (placeholder, out);
5866 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
5867 SET_DECL_VALUE_EXPR (decl_placeholder, priv);
5868 DECL_HAS_VALUE_EXPR_P (decl_placeholder) = 1;
5869 lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
5870 gimple_seq_add_seq (&sub_seq,
5871 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c));
5872 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5873 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
5874 OMP_CLAUSE_REDUCTION_DECL_PLACEHOLDER (c) = NULL;
5875 }
5876 else
5877 {
5878 x = build2 (code, TREE_TYPE (out), out, priv);
5879 out = unshare_expr (out);
5880 gimplify_assign (out, x, &sub_seq);
5881 }
5882 gimple *g = gimple_build_assign (new_var, POINTER_PLUS_EXPR, new_var,
5883 TYPE_SIZE_UNIT (TREE_TYPE (type)));
5884 gimple_seq_add_stmt (&sub_seq, g);
5885 g = gimple_build_assign (ref, POINTER_PLUS_EXPR, ref,
5886 TYPE_SIZE_UNIT (TREE_TYPE (type)));
5887 gimple_seq_add_stmt (&sub_seq, g);
5888 g = gimple_build_assign (i, PLUS_EXPR, i,
5889 build_int_cst (TREE_TYPE (i), 1));
5890 gimple_seq_add_stmt (&sub_seq, g);
5891 g = gimple_build_cond (LE_EXPR, i, v, body, end);
5892 gimple_seq_add_stmt (&sub_seq, g);
5893 gimple_seq_add_stmt (&sub_seq, gimple_build_label (end));
5894 }
5895 else if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c))
5896 {
5897 tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c);
5898
5899 if (is_reference (var)
5900 && !useless_type_conversion_p (TREE_TYPE (placeholder),
5901 TREE_TYPE (ref)))
5902 ref = build_fold_addr_expr_loc (clause_loc, ref);
5903 SET_DECL_VALUE_EXPR (placeholder, ref);
5904 DECL_HAS_VALUE_EXPR_P (placeholder) = 1;
5905 lower_omp (&OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c), ctx);
5906 gimple_seq_add_seq (&sub_seq, OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c));
5907 OMP_CLAUSE_REDUCTION_GIMPLE_MERGE (c) = NULL;
5908 OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) = NULL;
5909 }
5910 else
5911 {
5912 x = build2 (code, TREE_TYPE (ref), ref, new_var);
5913 ref = build_outer_var_ref (var, ctx);
5914 gimplify_assign (ref, x, &sub_seq);
5915 }
5916 }
5917
5918 if (is_gimple_omp_oacc (ctx->stmt))
5919 return;
5920
5921 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START),
5922 0);
5923 gimple_seq_add_stmt (stmt_seqp, stmt);
5924
5925 gimple_seq_add_seq (stmt_seqp, sub_seq);
5926
5927 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END),
5928 0);
5929 gimple_seq_add_stmt (stmt_seqp, stmt);
5930 }
5931
5932
5933 /* Generate code to implement the COPYPRIVATE clauses. */
5934
5935 static void
5936 lower_copyprivate_clauses (tree clauses, gimple_seq *slist, gimple_seq *rlist,
5937 omp_context *ctx)
5938 {
5939 tree c;
5940
5941 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
5942 {
5943 tree var, new_var, ref, x;
5944 bool by_ref;
5945 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
5946
5947 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYPRIVATE)
5948 continue;
5949
5950 var = OMP_CLAUSE_DECL (c);
5951 by_ref = use_pointer_for_field (var, NULL);
5952
5953 ref = build_sender_ref (var, ctx);
5954 x = new_var = lookup_decl_in_outer_ctx (var, ctx);
5955 if (by_ref)
5956 {
5957 x = build_fold_addr_expr_loc (clause_loc, new_var);
5958 x = fold_convert_loc (clause_loc, TREE_TYPE (ref), x);
5959 }
5960 gimplify_assign (ref, x, slist);
5961
5962 ref = build_receiver_ref (var, false, ctx);
5963 if (by_ref)
5964 {
5965 ref = fold_convert_loc (clause_loc,
5966 build_pointer_type (TREE_TYPE (new_var)),
5967 ref);
5968 ref = build_fold_indirect_ref_loc (clause_loc, ref);
5969 }
5970 if (is_reference (var))
5971 {
5972 ref = fold_convert_loc (clause_loc, TREE_TYPE (new_var), ref);
5973 ref = build_simple_mem_ref_loc (clause_loc, ref);
5974 new_var = build_simple_mem_ref_loc (clause_loc, new_var);
5975 }
5976 x = lang_hooks.decls.omp_clause_assign_op (c, new_var, ref);
5977 gimplify_and_add (x, rlist);
5978 }
5979 }
5980
5981
5982 /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE,
5983 and REDUCTION from the sender (aka parent) side. */
5984
5985 static void
5986 lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist,
5987 omp_context *ctx)
5988 {
5989 tree c, t;
5990 int ignored_looptemp = 0;
5991 bool is_taskloop = false;
5992
5993 /* For taskloop, ignore first two _looptemp_ clauses, those are initialized
5994 by GOMP_taskloop. */
5995 if (is_task_ctx (ctx) && gimple_omp_task_taskloop_p (ctx->stmt))
5996 {
5997 ignored_looptemp = 2;
5998 is_taskloop = true;
5999 }
6000
6001 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
6002 {
6003 tree val, ref, x, var;
6004 bool by_ref, do_in = false, do_out = false;
6005 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
6006
6007 switch (OMP_CLAUSE_CODE (c))
6008 {
6009 case OMP_CLAUSE_PRIVATE:
6010 if (OMP_CLAUSE_PRIVATE_OUTER_REF (c))
6011 break;
6012 continue;
6013 case OMP_CLAUSE_FIRSTPRIVATE:
6014 case OMP_CLAUSE_COPYIN:
6015 case OMP_CLAUSE_LASTPRIVATE:
6016 case OMP_CLAUSE_REDUCTION:
6017 break;
6018 case OMP_CLAUSE_SHARED:
6019 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
6020 break;
6021 continue;
6022 case OMP_CLAUSE__LOOPTEMP_:
6023 if (ignored_looptemp)
6024 {
6025 ignored_looptemp--;
6026 continue;
6027 }
6028 break;
6029 default:
6030 continue;
6031 }
6032
6033 val = OMP_CLAUSE_DECL (c);
6034 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
6035 && TREE_CODE (val) == MEM_REF)
6036 {
6037 val = TREE_OPERAND (val, 0);
6038 if (TREE_CODE (val) == POINTER_PLUS_EXPR)
6039 val = TREE_OPERAND (val, 0);
6040 if (TREE_CODE (val) == INDIRECT_REF
6041 || TREE_CODE (val) == ADDR_EXPR)
6042 val = TREE_OPERAND (val, 0);
6043 if (is_variable_sized (val))
6044 continue;
6045 }
6046
6047 /* For OMP_CLAUSE_SHARED_FIRSTPRIVATE, look beyond the
6048 outer taskloop region. */
6049 omp_context *ctx_for_o = ctx;
6050 if (is_taskloop
6051 && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED
6052 && OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
6053 ctx_for_o = ctx->outer;
6054
6055 var = lookup_decl_in_outer_ctx (val, ctx_for_o);
6056
6057 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_COPYIN
6058 && is_global_var (var))
6059 continue;
6060
6061 t = omp_member_access_dummy_var (var);
6062 if (t)
6063 {
6064 var = DECL_VALUE_EXPR (var);
6065 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx_for_o);
6066 if (o != t)
6067 var = unshare_and_remap (var, t, o);
6068 else
6069 var = unshare_expr (var);
6070 }
6071
6072 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SHARED)
6073 {
6074 /* Handle taskloop firstprivate/lastprivate, where the
6075 lastprivate on GIMPLE_OMP_TASK is represented as
6076 OMP_CLAUSE_SHARED_FIRSTPRIVATE. */
6077 tree f = lookup_sfield ((splay_tree_key) &DECL_UID (val), ctx);
6078 x = omp_build_component_ref (ctx->sender_decl, f);
6079 if (use_pointer_for_field (val, ctx))
6080 var = build_fold_addr_expr (var);
6081 gimplify_assign (x, var, ilist);
6082 DECL_ABSTRACT_ORIGIN (f) = NULL;
6083 continue;
6084 }
6085
6086 if ((OMP_CLAUSE_CODE (c) != OMP_CLAUSE_REDUCTION
6087 || val == OMP_CLAUSE_DECL (c))
6088 && is_variable_sized (val))
6089 continue;
6090 by_ref = use_pointer_for_field (val, NULL);
6091
6092 switch (OMP_CLAUSE_CODE (c))
6093 {
6094 case OMP_CLAUSE_PRIVATE:
6095 case OMP_CLAUSE_FIRSTPRIVATE:
6096 case OMP_CLAUSE_COPYIN:
6097 case OMP_CLAUSE__LOOPTEMP_:
6098 do_in = true;
6099 break;
6100
6101 case OMP_CLAUSE_LASTPRIVATE:
6102 if (by_ref || is_reference (val))
6103 {
6104 if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c))
6105 continue;
6106 do_in = true;
6107 }
6108 else
6109 {
6110 do_out = true;
6111 if (lang_hooks.decls.omp_private_outer_ref (val))
6112 do_in = true;
6113 }
6114 break;
6115
6116 case OMP_CLAUSE_REDUCTION:
6117 do_in = true;
6118 if (val == OMP_CLAUSE_DECL (c))
6119 do_out = !(by_ref || is_reference (val));
6120 else
6121 by_ref = TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE;
6122 break;
6123
6124 default:
6125 gcc_unreachable ();
6126 }
6127
6128 if (do_in)
6129 {
6130 ref = build_sender_ref (val, ctx);
6131 x = by_ref ? build_fold_addr_expr_loc (clause_loc, var) : var;
6132 gimplify_assign (ref, x, ilist);
6133 if (is_task_ctx (ctx))
6134 DECL_ABSTRACT_ORIGIN (TREE_OPERAND (ref, 1)) = NULL;
6135 }
6136
6137 if (do_out)
6138 {
6139 ref = build_sender_ref (val, ctx);
6140 gimplify_assign (var, ref, olist);
6141 }
6142 }
6143 }
6144
6145 /* Generate code to implement SHARED from the sender (aka parent)
6146 side. This is trickier, since GIMPLE_OMP_PARALLEL_CLAUSES doesn't
6147 list things that got automatically shared. */
6148
6149 static void
6150 lower_send_shared_vars (gimple_seq *ilist, gimple_seq *olist, omp_context *ctx)
6151 {
6152 tree var, ovar, nvar, t, f, x, record_type;
6153
6154 if (ctx->record_type == NULL)
6155 return;
6156
6157 record_type = ctx->srecord_type ? ctx->srecord_type : ctx->record_type;
6158 for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
6159 {
6160 ovar = DECL_ABSTRACT_ORIGIN (f);
6161 if (!ovar || TREE_CODE (ovar) == FIELD_DECL)
6162 continue;
6163
6164 nvar = maybe_lookup_decl (ovar, ctx);
6165 if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar))
6166 continue;
6167
6168 /* If CTX is a nested parallel directive. Find the immediately
6169 enclosing parallel or workshare construct that contains a
6170 mapping for OVAR. */
6171 var = lookup_decl_in_outer_ctx (ovar, ctx);
6172
6173 t = omp_member_access_dummy_var (var);
6174 if (t)
6175 {
6176 var = DECL_VALUE_EXPR (var);
6177 tree o = maybe_lookup_decl_in_outer_ctx (t, ctx);
6178 if (o != t)
6179 var = unshare_and_remap (var, t, o);
6180 else
6181 var = unshare_expr (var);
6182 }
6183
6184 if (use_pointer_for_field (ovar, ctx))
6185 {
6186 x = build_sender_ref (ovar, ctx);
6187 var = build_fold_addr_expr (var);
6188 gimplify_assign (x, var, ilist);
6189 }
6190 else
6191 {
6192 x = build_sender_ref (ovar, ctx);
6193 gimplify_assign (x, var, ilist);
6194
6195 if (!TREE_READONLY (var)
6196 /* We don't need to receive a new reference to a result
6197 or parm decl. In fact we may not store to it as we will
6198 invalidate any pending RSO and generate wrong gimple
6199 during inlining. */
6200 && !((TREE_CODE (var) == RESULT_DECL
6201 || TREE_CODE (var) == PARM_DECL)
6202 && DECL_BY_REFERENCE (var)))
6203 {
6204 x = build_sender_ref (ovar, ctx);
6205 gimplify_assign (var, x, olist);
6206 }
6207 }
6208 }
6209 }
6210
6211 /* Emit an OpenACC head marker call, encapulating the partitioning and
6212 other information that must be processed by the target compiler.
6213 Return the maximum number of dimensions the associated loop might
6214 be partitioned over. */
6215
6216 static unsigned
6217 lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
6218 gimple_seq *seq, omp_context *ctx)
6219 {
6220 unsigned levels = 0;
6221 unsigned tag = 0;
6222 tree gang_static = NULL_TREE;
6223 auto_vec<tree, 5> args;
6224
6225 args.quick_push (build_int_cst
6226 (integer_type_node, IFN_UNIQUE_OACC_HEAD_MARK));
6227 args.quick_push (ddvar);
6228 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
6229 {
6230 switch (OMP_CLAUSE_CODE (c))
6231 {
6232 case OMP_CLAUSE_GANG:
6233 tag |= OLF_DIM_GANG;
6234 gang_static = OMP_CLAUSE_GANG_STATIC_EXPR (c);
6235 /* static:* is represented by -1, and we can ignore it, as
6236 scheduling is always static. */
6237 if (gang_static && integer_minus_onep (gang_static))
6238 gang_static = NULL_TREE;
6239 levels++;
6240 break;
6241
6242 case OMP_CLAUSE_WORKER:
6243 tag |= OLF_DIM_WORKER;
6244 levels++;
6245 break;
6246
6247 case OMP_CLAUSE_VECTOR:
6248 tag |= OLF_DIM_VECTOR;
6249 levels++;
6250 break;
6251
6252 case OMP_CLAUSE_SEQ:
6253 tag |= OLF_SEQ;
6254 break;
6255
6256 case OMP_CLAUSE_AUTO:
6257 tag |= OLF_AUTO;
6258 break;
6259
6260 case OMP_CLAUSE_INDEPENDENT:
6261 tag |= OLF_INDEPENDENT;
6262 break;
6263
6264 default:
6265 continue;
6266 }
6267 }
6268
6269 if (gang_static)
6270 {
6271 if (DECL_P (gang_static))
6272 gang_static = build_outer_var_ref (gang_static, ctx);
6273 tag |= OLF_GANG_STATIC;
6274 }
6275
6276 /* In a parallel region, loops are implicitly INDEPENDENT. */
6277 omp_context *tgt = enclosing_target_ctx (ctx);
6278 if (!tgt || is_oacc_parallel (tgt))
6279 tag |= OLF_INDEPENDENT;
6280
6281 /* A loop lacking SEQ, GANG, WORKER and/or VECTOR is implicitly AUTO. */
6282 if (!(tag & (((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE)
6283 | OLF_SEQ)))
6284 tag |= OLF_AUTO;
6285
6286 /* Ensure at least one level. */
6287 if (!levels)
6288 levels++;
6289
6290 args.quick_push (build_int_cst (integer_type_node, levels));
6291 args.quick_push (build_int_cst (integer_type_node, tag));
6292 if (gang_static)
6293 args.quick_push (gang_static);
6294
6295 gcall *call = gimple_build_call_internal_vec (IFN_UNIQUE, args);
6296 gimple_set_location (call, loc);
6297 gimple_set_lhs (call, ddvar);
6298 gimple_seq_add_stmt (seq, call);
6299
6300 return levels;
6301 }
6302
6303 /* Emit an OpenACC lopp head or tail marker to SEQ. LEVEL is the
6304 partitioning level of the enclosed region. */
6305
6306 static void
6307 lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
6308 tree tofollow, gimple_seq *seq)
6309 {
6310 int marker_kind = (head ? IFN_UNIQUE_OACC_HEAD_MARK
6311 : IFN_UNIQUE_OACC_TAIL_MARK);
6312 tree marker = build_int_cst (integer_type_node, marker_kind);
6313 int nargs = 2 + (tofollow != NULL_TREE);
6314 gcall *call = gimple_build_call_internal (IFN_UNIQUE, nargs,
6315 marker, ddvar, tofollow);
6316 gimple_set_location (call, loc);
6317 gimple_set_lhs (call, ddvar);
6318 gimple_seq_add_stmt (seq, call);
6319 }
6320
6321 /* Generate the before and after OpenACC loop sequences. CLAUSES are
6322 the loop clauses, from which we extract reductions. Initialize
6323 HEAD and TAIL. */
6324
6325 static void
6326 lower_oacc_head_tail (location_t loc, tree clauses,
6327 gimple_seq *head, gimple_seq *tail, omp_context *ctx)
6328 {
6329 bool inner = false;
6330 tree ddvar = create_tmp_var (integer_type_node, ".data_dep");
6331 gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
6332
6333 unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
6334 if (!count)
6335 lower_oacc_loop_marker (loc, ddvar, false, integer_zero_node, tail);
6336
6337 tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
6338 tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
6339
6340 for (unsigned done = 1; count; count--, done++)
6341 {
6342 gimple_seq fork_seq = NULL;
6343 gimple_seq join_seq = NULL;
6344
6345 tree place = build_int_cst (integer_type_node, -1);
6346 gcall *fork = gimple_build_call_internal (IFN_UNIQUE, 3,
6347 fork_kind, ddvar, place);
6348 gimple_set_location (fork, loc);
6349 gimple_set_lhs (fork, ddvar);
6350
6351 gcall *join = gimple_build_call_internal (IFN_UNIQUE, 3,
6352 join_kind, ddvar, place);
6353 gimple_set_location (join, loc);
6354 gimple_set_lhs (join, ddvar);
6355
6356 /* Mark the beginning of this level sequence. */
6357 if (inner)
6358 lower_oacc_loop_marker (loc, ddvar, true,
6359 build_int_cst (integer_type_node, count),
6360 &fork_seq);
6361 lower_oacc_loop_marker (loc, ddvar, false,
6362 build_int_cst (integer_type_node, done),
6363 &join_seq);
6364
6365 lower_oacc_reductions (loc, clauses, place, inner,
6366 fork, join, &fork_seq, &join_seq, ctx);
6367
6368 /* Append this level to head. */
6369 gimple_seq_add_seq (head, fork_seq);
6370 /* Prepend it to tail. */
6371 gimple_seq_add_seq (&join_seq, *tail);
6372 *tail = join_seq;
6373
6374 inner = true;
6375 }
6376
6377 /* Mark the end of the sequence. */
6378 lower_oacc_loop_marker (loc, ddvar, true, NULL_TREE, head);
6379 lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail);
6380 }
6381
6382 /* A convenience function to build an empty GIMPLE_COND with just the
6383 condition. */
6384
6385 static gcond *
6386 gimple_build_cond_empty (tree cond)
6387 {
6388 enum tree_code pred_code;
6389 tree lhs, rhs;
6390
6391 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
6392 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
6393 }
6394
6395 /* Return true if a parallel REGION is within a declare target function or
6396 within a target region and is not a part of a gridified target. */
6397
6398 static bool
6399 parallel_needs_hsa_kernel_p (struct omp_region *region)
6400 {
6401 bool indirect = false;
6402 for (region = region->outer; region; region = region->outer)
6403 {
6404 if (region->type == GIMPLE_OMP_PARALLEL)
6405 indirect = true;
6406 else if (region->type == GIMPLE_OMP_TARGET)
6407 {
6408 gomp_target *tgt_stmt
6409 = as_a <gomp_target *> (last_stmt (region->entry));
6410
6411 if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
6412 OMP_CLAUSE__GRIDDIM_))
6413 return indirect;
6414 else
6415 return true;
6416 }
6417 }
6418
6419 if (lookup_attribute ("omp declare target",
6420 DECL_ATTRIBUTES (current_function_decl)))
6421 return true;
6422
6423 return false;
6424 }
6425
6426 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
6427 bool = false);
6428
6429 /* Build the function calls to GOMP_parallel_start etc to actually
6430 generate the parallel operation. REGION is the parallel region
6431 being expanded. BB is the block where to insert the code. WS_ARGS
6432 will be set if this is a call to a combined parallel+workshare
6433 construct, it contains the list of additional arguments needed by
6434 the workshare construct. */
6435
6436 static void
6437 expand_parallel_call (struct omp_region *region, basic_block bb,
6438 gomp_parallel *entry_stmt,
6439 vec<tree, va_gc> *ws_args)
6440 {
6441 tree t, t1, t2, val, cond, c, clauses, flags;
6442 gimple_stmt_iterator gsi;
6443 gimple *stmt;
6444 enum built_in_function start_ix;
6445 int start_ix2;
6446 location_t clause_loc;
6447 vec<tree, va_gc> *args;
6448
6449 clauses = gimple_omp_parallel_clauses (entry_stmt);
6450
6451 /* Determine what flavor of GOMP_parallel we will be
6452 emitting. */
6453 start_ix = BUILT_IN_GOMP_PARALLEL;
6454 if (is_combined_parallel (region))
6455 {
6456 switch (region->inner->type)
6457 {
6458 case GIMPLE_OMP_FOR:
6459 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
6460 switch (region->inner->sched_kind)
6461 {
6462 case OMP_CLAUSE_SCHEDULE_RUNTIME:
6463 start_ix2 = 3;
6464 break;
6465 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
6466 case OMP_CLAUSE_SCHEDULE_GUIDED:
6467 if (region->inner->sched_modifiers
6468 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
6469 {
6470 start_ix2 = 3 + region->inner->sched_kind;
6471 break;
6472 }
6473 /* FALLTHRU */
6474 default:
6475 start_ix2 = region->inner->sched_kind;
6476 break;
6477 }
6478 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
6479 start_ix = (enum built_in_function) start_ix2;
6480 break;
6481 case GIMPLE_OMP_SECTIONS:
6482 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
6483 break;
6484 default:
6485 gcc_unreachable ();
6486 }
6487 }
6488
6489 /* By default, the value of NUM_THREADS is zero (selected at run time)
6490 and there is no conditional. */
6491 cond = NULL_TREE;
6492 val = build_int_cst (unsigned_type_node, 0);
6493 flags = build_int_cst (unsigned_type_node, 0);
6494
6495 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
6496 if (c)
6497 cond = OMP_CLAUSE_IF_EXPR (c);
6498
6499 c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS);
6500 if (c)
6501 {
6502 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
6503 clause_loc = OMP_CLAUSE_LOCATION (c);
6504 }
6505 else
6506 clause_loc = gimple_location (entry_stmt);
6507
6508 c = find_omp_clause (clauses, OMP_CLAUSE_PROC_BIND);
6509 if (c)
6510 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
6511
6512 /* Ensure 'val' is of the correct type. */
6513 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
6514
6515 /* If we found the clause 'if (cond)', build either
6516 (cond != 0) or (cond ? val : 1u). */
6517 if (cond)
6518 {
6519 cond = gimple_boolify (cond);
6520
6521 if (integer_zerop (val))
6522 val = fold_build2_loc (clause_loc,
6523 EQ_EXPR, unsigned_type_node, cond,
6524 build_int_cst (TREE_TYPE (cond), 0));
6525 else
6526 {
6527 basic_block cond_bb, then_bb, else_bb;
6528 edge e, e_then, e_else;
6529 tree tmp_then, tmp_else, tmp_join, tmp_var;
6530
6531 tmp_var = create_tmp_var (TREE_TYPE (val));
6532 if (gimple_in_ssa_p (cfun))
6533 {
6534 tmp_then = make_ssa_name (tmp_var);
6535 tmp_else = make_ssa_name (tmp_var);
6536 tmp_join = make_ssa_name (tmp_var);
6537 }
6538 else
6539 {
6540 tmp_then = tmp_var;
6541 tmp_else = tmp_var;
6542 tmp_join = tmp_var;
6543 }
6544
6545 e = split_block_after_labels (bb);
6546 cond_bb = e->src;
6547 bb = e->dest;
6548 remove_edge (e);
6549
6550 then_bb = create_empty_bb (cond_bb);
6551 else_bb = create_empty_bb (then_bb);
6552 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
6553 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
6554
6555 stmt = gimple_build_cond_empty (cond);
6556 gsi = gsi_start_bb (cond_bb);
6557 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
6558
6559 gsi = gsi_start_bb (then_bb);
6560 expand_omp_build_assign (&gsi, tmp_then, val, true);
6561
6562 gsi = gsi_start_bb (else_bb);
6563 expand_omp_build_assign (&gsi, tmp_else,
6564 build_int_cst (unsigned_type_node, 1),
6565 true);
6566
6567 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
6568 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
6569 add_bb_to_loop (then_bb, cond_bb->loop_father);
6570 add_bb_to_loop (else_bb, cond_bb->loop_father);
6571 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
6572 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
6573
6574 if (gimple_in_ssa_p (cfun))
6575 {
6576 gphi *phi = create_phi_node (tmp_join, bb);
6577 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
6578 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
6579 }
6580
6581 val = tmp_join;
6582 }
6583
6584 gsi = gsi_start_bb (bb);
6585 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
6586 false, GSI_CONTINUE_LINKING);
6587 }
6588
6589 gsi = gsi_last_bb (bb);
6590 t = gimple_omp_parallel_data_arg (entry_stmt);
6591 if (t == NULL)
6592 t1 = null_pointer_node;
6593 else
6594 t1 = build_fold_addr_expr (t);
6595 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
6596 t2 = build_fold_addr_expr (child_fndecl);
6597
6598 vec_alloc (args, 4 + vec_safe_length (ws_args));
6599 args->quick_push (t2);
6600 args->quick_push (t1);
6601 args->quick_push (val);
6602 if (ws_args)
6603 args->splice (*ws_args);
6604 args->quick_push (flags);
6605
6606 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
6607 builtin_decl_explicit (start_ix), args);
6608
6609 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6610 false, GSI_CONTINUE_LINKING);
6611
6612 if (hsa_gen_requested_p ()
6613 && parallel_needs_hsa_kernel_p (region))
6614 {
6615 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
6616 hsa_register_kernel (child_cnode);
6617 }
6618 }
6619
6620 /* Insert a function call whose name is FUNC_NAME with the information from
6621 ENTRY_STMT into the basic_block BB. */
6622
6623 static void
6624 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
6625 vec <tree, va_gc> *ws_args)
6626 {
6627 tree t, t1, t2;
6628 gimple_stmt_iterator gsi;
6629 vec <tree, va_gc> *args;
6630
6631 gcc_assert (vec_safe_length (ws_args) == 2);
6632 tree func_name = (*ws_args)[0];
6633 tree grain = (*ws_args)[1];
6634
6635 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
6636 tree count = find_omp_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
6637 gcc_assert (count != NULL_TREE);
6638 count = OMP_CLAUSE_OPERAND (count, 0);
6639
6640 gsi = gsi_last_bb (bb);
6641 t = gimple_omp_parallel_data_arg (entry_stmt);
6642 if (t == NULL)
6643 t1 = null_pointer_node;
6644 else
6645 t1 = build_fold_addr_expr (t);
6646 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
6647
6648 vec_alloc (args, 4);
6649 args->quick_push (t2);
6650 args->quick_push (t1);
6651 args->quick_push (count);
6652 args->quick_push (grain);
6653 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
6654
6655 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
6656 GSI_CONTINUE_LINKING);
6657 }
6658
6659 /* Build the function call to GOMP_task to actually
6660 generate the task operation. BB is the block where to insert the code. */
6661
6662 static void
6663 expand_task_call (struct omp_region *region, basic_block bb,
6664 gomp_task *entry_stmt)
6665 {
6666 tree t1, t2, t3;
6667 gimple_stmt_iterator gsi;
6668 location_t loc = gimple_location (entry_stmt);
6669
6670 tree clauses = gimple_omp_task_clauses (entry_stmt);
6671
6672 tree ifc = find_omp_clause (clauses, OMP_CLAUSE_IF);
6673 tree untied = find_omp_clause (clauses, OMP_CLAUSE_UNTIED);
6674 tree mergeable = find_omp_clause (clauses, OMP_CLAUSE_MERGEABLE);
6675 tree depend = find_omp_clause (clauses, OMP_CLAUSE_DEPEND);
6676 tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL);
6677 tree priority = find_omp_clause (clauses, OMP_CLAUSE_PRIORITY);
6678
6679 unsigned int iflags
6680 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
6681 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
6682 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
6683
6684 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
6685 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
6686 tree num_tasks = NULL_TREE;
6687 bool ull = false;
6688 if (taskloop_p)
6689 {
6690 gimple *g = last_stmt (region->outer->entry);
6691 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
6692 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
6693 struct omp_for_data fd;
6694 extract_omp_for_data (as_a <gomp_for *> (g), &fd, NULL);
6695 startvar = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
6696 endvar = find_omp_clause (OMP_CLAUSE_CHAIN (startvar),
6697 OMP_CLAUSE__LOOPTEMP_);
6698 startvar = OMP_CLAUSE_DECL (startvar);
6699 endvar = OMP_CLAUSE_DECL (endvar);
6700 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
6701 if (fd.loop.cond_code == LT_EXPR)
6702 iflags |= GOMP_TASK_FLAG_UP;
6703 tree tclauses = gimple_omp_for_clauses (g);
6704 num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
6705 if (num_tasks)
6706 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
6707 else
6708 {
6709 num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
6710 if (num_tasks)
6711 {
6712 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
6713 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
6714 }
6715 else
6716 num_tasks = integer_zero_node;
6717 }
6718 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
6719 if (ifc == NULL_TREE)
6720 iflags |= GOMP_TASK_FLAG_IF;
6721 if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP))
6722 iflags |= GOMP_TASK_FLAG_NOGROUP;
6723 ull = fd.iter_type == long_long_unsigned_type_node;
6724 }
6725 else if (priority)
6726 iflags |= GOMP_TASK_FLAG_PRIORITY;
6727
6728 tree flags = build_int_cst (unsigned_type_node, iflags);
6729
6730 tree cond = boolean_true_node;
6731 if (ifc)
6732 {
6733 if (taskloop_p)
6734 {
6735 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
6736 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
6737 build_int_cst (unsigned_type_node,
6738 GOMP_TASK_FLAG_IF),
6739 build_int_cst (unsigned_type_node, 0));
6740 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
6741 flags, t);
6742 }
6743 else
6744 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
6745 }
6746
6747 if (finalc)
6748 {
6749 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
6750 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
6751 build_int_cst (unsigned_type_node,
6752 GOMP_TASK_FLAG_FINAL),
6753 build_int_cst (unsigned_type_node, 0));
6754 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
6755 }
6756 if (depend)
6757 depend = OMP_CLAUSE_DECL (depend);
6758 else
6759 depend = build_int_cst (ptr_type_node, 0);
6760 if (priority)
6761 priority = fold_convert (integer_type_node,
6762 OMP_CLAUSE_PRIORITY_EXPR (priority));
6763 else
6764 priority = integer_zero_node;
6765
6766 gsi = gsi_last_bb (bb);
6767 tree t = gimple_omp_task_data_arg (entry_stmt);
6768 if (t == NULL)
6769 t2 = null_pointer_node;
6770 else
6771 t2 = build_fold_addr_expr_loc (loc, t);
6772 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
6773 t = gimple_omp_task_copy_fn (entry_stmt);
6774 if (t == NULL)
6775 t3 = null_pointer_node;
6776 else
6777 t3 = build_fold_addr_expr_loc (loc, t);
6778
6779 if (taskloop_p)
6780 t = build_call_expr (ull
6781 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
6782 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
6783 11, t1, t2, t3,
6784 gimple_omp_task_arg_size (entry_stmt),
6785 gimple_omp_task_arg_align (entry_stmt), flags,
6786 num_tasks, priority, startvar, endvar, step);
6787 else
6788 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
6789 9, t1, t2, t3,
6790 gimple_omp_task_arg_size (entry_stmt),
6791 gimple_omp_task_arg_align (entry_stmt), cond, flags,
6792 depend, priority);
6793
6794 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6795 false, GSI_CONTINUE_LINKING);
6796 }
6797
6798
6799 /* If exceptions are enabled, wrap the statements in BODY in a MUST_NOT_THROW
6800 catch handler and return it. This prevents programs from violating the
6801 structured block semantics with throws. */
6802
6803 static gimple_seq
6804 maybe_catch_exception (gimple_seq body)
6805 {
6806 gimple *g;
6807 tree decl;
6808
6809 if (!flag_exceptions)
6810 return body;
6811
6812 if (lang_hooks.eh_protect_cleanup_actions != NULL)
6813 decl = lang_hooks.eh_protect_cleanup_actions ();
6814 else
6815 decl = builtin_decl_explicit (BUILT_IN_TRAP);
6816
6817 g = gimple_build_eh_must_not_throw (decl);
6818 g = gimple_build_try (body, gimple_seq_alloc_with_stmt (g),
6819 GIMPLE_TRY_CATCH);
6820
6821 return gimple_seq_alloc_with_stmt (g);
6822 }
6823
6824 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
6825
6826 static tree
6827 vec2chain (vec<tree, va_gc> *v)
6828 {
6829 tree chain = NULL_TREE, t;
6830 unsigned ix;
6831
6832 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
6833 {
6834 DECL_CHAIN (t) = chain;
6835 chain = t;
6836 }
6837
6838 return chain;
6839 }
6840
6841
6842 /* Remove barriers in REGION->EXIT's block. Note that this is only
6843 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
6844 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
6845 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
6846 removed. */
6847
6848 static void
6849 remove_exit_barrier (struct omp_region *region)
6850 {
6851 gimple_stmt_iterator gsi;
6852 basic_block exit_bb;
6853 edge_iterator ei;
6854 edge e;
6855 gimple *stmt;
6856 int any_addressable_vars = -1;
6857
6858 exit_bb = region->exit;
6859
6860 /* If the parallel region doesn't return, we don't have REGION->EXIT
6861 block at all. */
6862 if (! exit_bb)
6863 return;
6864
6865 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
6866 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
6867 statements that can appear in between are extremely limited -- no
6868 memory operations at all. Here, we allow nothing at all, so the
6869 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
6870 gsi = gsi_last_bb (exit_bb);
6871 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
6872 gsi_prev (&gsi);
6873 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
6874 return;
6875
6876 FOR_EACH_EDGE (e, ei, exit_bb->preds)
6877 {
6878 gsi = gsi_last_bb (e->src);
6879 if (gsi_end_p (gsi))
6880 continue;
6881 stmt = gsi_stmt (gsi);
6882 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
6883 && !gimple_omp_return_nowait_p (stmt))
6884 {
6885 /* OpenMP 3.0 tasks unfortunately prevent this optimization
6886 in many cases. If there could be tasks queued, the barrier
6887 might be needed to let the tasks run before some local
6888 variable of the parallel that the task uses as shared
6889 runs out of scope. The task can be spawned either
6890 from within current function (this would be easy to check)
6891 or from some function it calls and gets passed an address
6892 of such a variable. */
6893 if (any_addressable_vars < 0)
6894 {
6895 gomp_parallel *parallel_stmt
6896 = as_a <gomp_parallel *> (last_stmt (region->entry));
6897 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
6898 tree local_decls, block, decl;
6899 unsigned ix;
6900
6901 any_addressable_vars = 0;
6902 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
6903 if (TREE_ADDRESSABLE (decl))
6904 {
6905 any_addressable_vars = 1;
6906 break;
6907 }
6908 for (block = gimple_block (stmt);
6909 !any_addressable_vars
6910 && block
6911 && TREE_CODE (block) == BLOCK;
6912 block = BLOCK_SUPERCONTEXT (block))
6913 {
6914 for (local_decls = BLOCK_VARS (block);
6915 local_decls;
6916 local_decls = DECL_CHAIN (local_decls))
6917 if (TREE_ADDRESSABLE (local_decls))
6918 {
6919 any_addressable_vars = 1;
6920 break;
6921 }
6922 if (block == gimple_block (parallel_stmt))
6923 break;
6924 }
6925 }
6926 if (!any_addressable_vars)
6927 gimple_omp_return_set_nowait (stmt);
6928 }
6929 }
6930 }
6931
6932 static void
6933 remove_exit_barriers (struct omp_region *region)
6934 {
6935 if (region->type == GIMPLE_OMP_PARALLEL)
6936 remove_exit_barrier (region);
6937
6938 if (region->inner)
6939 {
6940 region = region->inner;
6941 remove_exit_barriers (region);
6942 while (region->next)
6943 {
6944 region = region->next;
6945 remove_exit_barriers (region);
6946 }
6947 }
6948 }
6949
6950 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
6951 calls. These can't be declared as const functions, but
6952 within one parallel body they are constant, so they can be
6953 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
6954 which are declared const. Similarly for task body, except
6955 that in untied task omp_get_thread_num () can change at any task
6956 scheduling point. */
6957
6958 static void
6959 optimize_omp_library_calls (gimple *entry_stmt)
6960 {
6961 basic_block bb;
6962 gimple_stmt_iterator gsi;
6963 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
6964 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
6965 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
6966 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
6967 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
6968 && find_omp_clause (gimple_omp_task_clauses (entry_stmt),
6969 OMP_CLAUSE_UNTIED) != NULL);
6970
6971 FOR_EACH_BB_FN (bb, cfun)
6972 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6973 {
6974 gimple *call = gsi_stmt (gsi);
6975 tree decl;
6976
6977 if (is_gimple_call (call)
6978 && (decl = gimple_call_fndecl (call))
6979 && DECL_EXTERNAL (decl)
6980 && TREE_PUBLIC (decl)
6981 && DECL_INITIAL (decl) == NULL)
6982 {
6983 tree built_in;
6984
6985 if (DECL_NAME (decl) == thr_num_id)
6986 {
6987 /* In #pragma omp task untied omp_get_thread_num () can change
6988 during the execution of the task region. */
6989 if (untied_task)
6990 continue;
6991 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
6992 }
6993 else if (DECL_NAME (decl) == num_thr_id)
6994 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
6995 else
6996 continue;
6997
6998 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
6999 || gimple_call_num_args (call) != 0)
7000 continue;
7001
7002 if (flag_exceptions && !TREE_NOTHROW (decl))
7003 continue;
7004
7005 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
7006 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
7007 TREE_TYPE (TREE_TYPE (built_in))))
7008 continue;
7009
7010 gimple_call_set_fndecl (call, built_in);
7011 }
7012 }
7013 }
7014
7015 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
7016 regimplified. */
7017
7018 static tree
7019 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
7020 {
7021 tree t = *tp;
7022
7023 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
7024 if (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t))
7025 return t;
7026
7027 if (TREE_CODE (t) == ADDR_EXPR)
7028 recompute_tree_invariant_for_addr_expr (t);
7029
7030 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7031 return NULL_TREE;
7032 }
7033
7034 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
7035
7036 static void
7037 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
7038 bool after)
7039 {
7040 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
7041 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
7042 !after, after ? GSI_CONTINUE_LINKING
7043 : GSI_SAME_STMT);
7044 gimple *stmt = gimple_build_assign (to, from);
7045 if (after)
7046 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
7047 else
7048 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
7049 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
7050 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
7051 {
7052 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
7053 gimple_regimplify_operands (stmt, &gsi);
7054 }
7055 }
7056
7057 /* Expand the OpenMP parallel or task directive starting at REGION. */
7058
7059 static void
7060 expand_omp_taskreg (struct omp_region *region)
7061 {
7062 basic_block entry_bb, exit_bb, new_bb;
7063 struct function *child_cfun;
7064 tree child_fn, block, t;
7065 gimple_stmt_iterator gsi;
7066 gimple *entry_stmt, *stmt;
7067 edge e;
7068 vec<tree, va_gc> *ws_args;
7069
7070 entry_stmt = last_stmt (region->entry);
7071 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
7072 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7073
7074 entry_bb = region->entry;
7075 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
7076 exit_bb = region->cont;
7077 else
7078 exit_bb = region->exit;
7079
7080 bool is_cilk_for
7081 = (flag_cilkplus
7082 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
7083 && find_omp_clause (gimple_omp_parallel_clauses (entry_stmt),
7084 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
7085
7086 if (is_cilk_for)
7087 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
7088 and the inner statement contains the name of the built-in function
7089 and grain. */
7090 ws_args = region->inner->ws_args;
7091 else if (is_combined_parallel (region))
7092 ws_args = region->ws_args;
7093 else
7094 ws_args = NULL;
7095
7096 if (child_cfun->cfg)
7097 {
7098 /* Due to inlining, it may happen that we have already outlined
7099 the region, in which case all we need to do is make the
7100 sub-graph unreachable and emit the parallel call. */
7101 edge entry_succ_e, exit_succ_e;
7102
7103 entry_succ_e = single_succ_edge (entry_bb);
7104
7105 gsi = gsi_last_bb (entry_bb);
7106 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
7107 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
7108 gsi_remove (&gsi, true);
7109
7110 new_bb = entry_bb;
7111 if (exit_bb)
7112 {
7113 exit_succ_e = single_succ_edge (exit_bb);
7114 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
7115 }
7116 remove_edge_and_dominated_blocks (entry_succ_e);
7117 }
7118 else
7119 {
7120 unsigned srcidx, dstidx, num;
7121
7122 /* If the parallel region needs data sent from the parent
7123 function, then the very first statement (except possible
7124 tree profile counter updates) of the parallel body
7125 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7126 &.OMP_DATA_O is passed as an argument to the child function,
7127 we need to replace it with the argument as seen by the child
7128 function.
7129
7130 In most cases, this will end up being the identity assignment
7131 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
7132 a function call that has been inlined, the original PARM_DECL
7133 .OMP_DATA_I may have been converted into a different local
7134 variable. In which case, we need to keep the assignment. */
7135 if (gimple_omp_taskreg_data_arg (entry_stmt))
7136 {
7137 basic_block entry_succ_bb
7138 = single_succ_p (entry_bb) ? single_succ (entry_bb)
7139 : FALLTHRU_EDGE (entry_bb)->dest;
7140 tree arg;
7141 gimple *parcopy_stmt = NULL;
7142
7143 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7144 {
7145 gimple *stmt;
7146
7147 gcc_assert (!gsi_end_p (gsi));
7148 stmt = gsi_stmt (gsi);
7149 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7150 continue;
7151
7152 if (gimple_num_ops (stmt) == 2)
7153 {
7154 tree arg = gimple_assign_rhs1 (stmt);
7155
7156 /* We're ignore the subcode because we're
7157 effectively doing a STRIP_NOPS. */
7158
7159 if (TREE_CODE (arg) == ADDR_EXPR
7160 && TREE_OPERAND (arg, 0)
7161 == gimple_omp_taskreg_data_arg (entry_stmt))
7162 {
7163 parcopy_stmt = stmt;
7164 break;
7165 }
7166 }
7167 }
7168
7169 gcc_assert (parcopy_stmt != NULL);
7170 arg = DECL_ARGUMENTS (child_fn);
7171
7172 if (!gimple_in_ssa_p (cfun))
7173 {
7174 if (gimple_assign_lhs (parcopy_stmt) == arg)
7175 gsi_remove (&gsi, true);
7176 else
7177 {
7178 /* ?? Is setting the subcode really necessary ?? */
7179 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
7180 gimple_assign_set_rhs1 (parcopy_stmt, arg);
7181 }
7182 }
7183 else
7184 {
7185 tree lhs = gimple_assign_lhs (parcopy_stmt);
7186 gcc_assert (SSA_NAME_VAR (lhs) == arg);
7187 /* We'd like to set the rhs to the default def in the child_fn,
7188 but it's too early to create ssa names in the child_fn.
7189 Instead, we set the rhs to the parm. In
7190 move_sese_region_to_fn, we introduce a default def for the
7191 parm, map the parm to it's default def, and once we encounter
7192 this stmt, replace the parm with the default def. */
7193 gimple_assign_set_rhs1 (parcopy_stmt, arg);
7194 update_stmt (parcopy_stmt);
7195 }
7196 }
7197
7198 /* Declare local variables needed in CHILD_CFUN. */
7199 block = DECL_INITIAL (child_fn);
7200 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7201 /* The gimplifier could record temporaries in parallel/task block
7202 rather than in containing function's local_decls chain,
7203 which would mean cgraph missed finalizing them. Do it now. */
7204 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7205 if (TREE_CODE (t) == VAR_DECL
7206 && TREE_STATIC (t)
7207 && !DECL_EXTERNAL (t))
7208 varpool_node::finalize_decl (t);
7209 DECL_SAVED_TREE (child_fn) = NULL;
7210 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7211 gimple_set_body (child_fn, NULL);
7212 TREE_USED (block) = 1;
7213
7214 /* Reset DECL_CONTEXT on function arguments. */
7215 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7216 DECL_CONTEXT (t) = child_fn;
7217
7218 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
7219 so that it can be moved to the child function. */
7220 gsi = gsi_last_bb (entry_bb);
7221 stmt = gsi_stmt (gsi);
7222 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
7223 || gimple_code (stmt) == GIMPLE_OMP_TASK));
7224 e = split_block (entry_bb, stmt);
7225 gsi_remove (&gsi, true);
7226 entry_bb = e->dest;
7227 edge e2 = NULL;
7228 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
7229 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7230 else
7231 {
7232 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
7233 gcc_assert (e2->dest == region->exit);
7234 remove_edge (BRANCH_EDGE (entry_bb));
7235 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
7236 gsi = gsi_last_bb (region->exit);
7237 gcc_assert (!gsi_end_p (gsi)
7238 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7239 gsi_remove (&gsi, true);
7240 }
7241
7242 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
7243 if (exit_bb)
7244 {
7245 gsi = gsi_last_bb (exit_bb);
7246 gcc_assert (!gsi_end_p (gsi)
7247 && (gimple_code (gsi_stmt (gsi))
7248 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
7249 stmt = gimple_build_return (NULL);
7250 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7251 gsi_remove (&gsi, true);
7252 }
7253
7254 /* Move the parallel region into CHILD_CFUN. */
7255
7256 if (gimple_in_ssa_p (cfun))
7257 {
7258 init_tree_ssa (child_cfun);
7259 init_ssa_operands (child_cfun);
7260 child_cfun->gimple_df->in_ssa_p = true;
7261 block = NULL_TREE;
7262 }
7263 else
7264 block = gimple_block (entry_stmt);
7265
7266 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7267 if (exit_bb)
7268 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7269 if (e2)
7270 {
7271 basic_block dest_bb = e2->dest;
7272 if (!exit_bb)
7273 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
7274 remove_edge (e2);
7275 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
7276 }
7277 /* When the OMP expansion process cannot guarantee an up-to-date
7278 loop tree arrange for the child function to fixup loops. */
7279 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7280 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7281
7282 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7283 num = vec_safe_length (child_cfun->local_decls);
7284 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7285 {
7286 t = (*child_cfun->local_decls)[srcidx];
7287 if (DECL_CONTEXT (t) == cfun->decl)
7288 continue;
7289 if (srcidx != dstidx)
7290 (*child_cfun->local_decls)[dstidx] = t;
7291 dstidx++;
7292 }
7293 if (dstidx != num)
7294 vec_safe_truncate (child_cfun->local_decls, dstidx);
7295
7296 /* Inform the callgraph about the new function. */
7297 child_cfun->curr_properties = cfun->curr_properties;
7298 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7299 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7300 cgraph_node *node = cgraph_node::get_create (child_fn);
7301 node->parallelized_function = 1;
7302 cgraph_node::add_new_function (child_fn, true);
7303
7304 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7305 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7306
7307 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7308 fixed in a following pass. */
7309 push_cfun (child_cfun);
7310 if (need_asm)
7311 assign_assembler_name_if_neeeded (child_fn);
7312
7313 if (optimize)
7314 optimize_omp_library_calls (entry_stmt);
7315 cgraph_edge::rebuild_edges ();
7316
7317 /* Some EH regions might become dead, see PR34608. If
7318 pass_cleanup_cfg isn't the first pass to happen with the
7319 new child, these dead EH edges might cause problems.
7320 Clean them up now. */
7321 if (flag_exceptions)
7322 {
7323 basic_block bb;
7324 bool changed = false;
7325
7326 FOR_EACH_BB_FN (bb, cfun)
7327 changed |= gimple_purge_dead_eh_edges (bb);
7328 if (changed)
7329 cleanup_tree_cfg ();
7330 }
7331 if (gimple_in_ssa_p (cfun))
7332 update_ssa (TODO_update_ssa);
7333 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7334 verify_loop_structure ();
7335 pop_cfun ();
7336
7337 if (dump_file && !gimple_in_ssa_p (cfun))
7338 {
7339 omp_any_child_fn_dumped = true;
7340 dump_function_header (dump_file, child_fn, dump_flags);
7341 dump_function_to_file (child_fn, dump_file, dump_flags);
7342 }
7343 }
7344
7345 /* Emit a library call to launch the children threads. */
7346 if (is_cilk_for)
7347 expand_cilk_for_call (new_bb,
7348 as_a <gomp_parallel *> (entry_stmt), ws_args);
7349 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
7350 expand_parallel_call (region, new_bb,
7351 as_a <gomp_parallel *> (entry_stmt), ws_args);
7352 else
7353 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
7354 if (gimple_in_ssa_p (cfun))
7355 update_ssa (TODO_update_ssa_only_virtuals);
7356 }
7357
7358 /* Information about members of an OpenACC collapsed loop nest. */
7359
7360 struct oacc_collapse
7361 {
7362 tree base; /* Base value. */
7363 tree iters; /* Number of steps. */
7364 tree step; /* step size. */
7365 };
7366
7367 /* Helper for expand_oacc_for. Determine collapsed loop information.
7368 Fill in COUNTS array. Emit any initialization code before GSI.
7369 Return the calculated outer loop bound of BOUND_TYPE. */
7370
7371 static tree
7372 expand_oacc_collapse_init (const struct omp_for_data *fd,
7373 gimple_stmt_iterator *gsi,
7374 oacc_collapse *counts, tree bound_type)
7375 {
7376 tree total = build_int_cst (bound_type, 1);
7377 int ix;
7378
7379 gcc_assert (integer_onep (fd->loop.step));
7380 gcc_assert (integer_zerop (fd->loop.n1));
7381
7382 for (ix = 0; ix != fd->collapse; ix++)
7383 {
7384 const omp_for_data_loop *loop = &fd->loops[ix];
7385
7386 tree iter_type = TREE_TYPE (loop->v);
7387 tree diff_type = iter_type;
7388 tree plus_type = iter_type;
7389
7390 gcc_assert (loop->cond_code == fd->loop.cond_code);
7391
7392 if (POINTER_TYPE_P (iter_type))
7393 plus_type = sizetype;
7394 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7395 diff_type = signed_type_for (diff_type);
7396
7397 tree b = loop->n1;
7398 tree e = loop->n2;
7399 tree s = loop->step;
7400 bool up = loop->cond_code == LT_EXPR;
7401 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7402 bool negating;
7403 tree expr;
7404
7405 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
7406 true, GSI_SAME_STMT);
7407 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
7408 true, GSI_SAME_STMT);
7409
7410 /* Convert the step, avoiding possible unsigned->signed overflow. */
7411 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7412 if (negating)
7413 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7414 s = fold_convert (diff_type, s);
7415 if (negating)
7416 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7417 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
7418 true, GSI_SAME_STMT);
7419
7420 /* Determine the range, avoiding possible unsigned->signed overflow. */
7421 negating = !up && TYPE_UNSIGNED (iter_type);
7422 expr = fold_build2 (MINUS_EXPR, plus_type,
7423 fold_convert (plus_type, negating ? b : e),
7424 fold_convert (plus_type, negating ? e : b));
7425 expr = fold_convert (diff_type, expr);
7426 if (negating)
7427 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7428 tree range = force_gimple_operand_gsi
7429 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
7430
7431 /* Determine number of iterations. */
7432 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7433 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7434 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7435
7436 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
7437 true, GSI_SAME_STMT);
7438
7439 counts[ix].base = b;
7440 counts[ix].iters = iters;
7441 counts[ix].step = s;
7442
7443 total = fold_build2 (MULT_EXPR, bound_type, total,
7444 fold_convert (bound_type, iters));
7445 }
7446
7447 return total;
7448 }
7449
7450 /* Emit initializers for collapsed loop members. IVAR is the outer
7451 loop iteration variable, from which collapsed loop iteration values
7452 are calculated. COUNTS array has been initialized by
7453 expand_oacc_collapse_inits. */
7454
7455 static void
7456 expand_oacc_collapse_vars (const struct omp_for_data *fd,
7457 gimple_stmt_iterator *gsi,
7458 const oacc_collapse *counts, tree ivar)
7459 {
7460 tree ivar_type = TREE_TYPE (ivar);
7461
7462 /* The most rapidly changing iteration variable is the innermost
7463 one. */
7464 for (int ix = fd->collapse; ix--;)
7465 {
7466 const omp_for_data_loop *loop = &fd->loops[ix];
7467 const oacc_collapse *collapse = &counts[ix];
7468 tree iter_type = TREE_TYPE (loop->v);
7469 tree diff_type = TREE_TYPE (collapse->step);
7470 tree plus_type = iter_type;
7471 enum tree_code plus_code = PLUS_EXPR;
7472 tree expr;
7473
7474 if (POINTER_TYPE_P (iter_type))
7475 {
7476 plus_code = POINTER_PLUS_EXPR;
7477 plus_type = sizetype;
7478 }
7479
7480 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
7481 fold_convert (ivar_type, collapse->iters));
7482 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
7483 collapse->step);
7484 expr = fold_build2 (plus_code, iter_type, collapse->base,
7485 fold_convert (plus_type, expr));
7486 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
7487 true, GSI_SAME_STMT);
7488 gassign *ass = gimple_build_assign (loop->v, expr);
7489 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
7490
7491 if (ix)
7492 {
7493 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
7494 fold_convert (ivar_type, collapse->iters));
7495 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
7496 true, GSI_SAME_STMT);
7497 }
7498 }
7499 }
7500
7501
7502 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
7503 of the combined collapse > 1 loop constructs, generate code like:
7504 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
7505 if (cond3 is <)
7506 adj = STEP3 - 1;
7507 else
7508 adj = STEP3 + 1;
7509 count3 = (adj + N32 - N31) / STEP3;
7510 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
7511 if (cond2 is <)
7512 adj = STEP2 - 1;
7513 else
7514 adj = STEP2 + 1;
7515 count2 = (adj + N22 - N21) / STEP2;
7516 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
7517 if (cond1 is <)
7518 adj = STEP1 - 1;
7519 else
7520 adj = STEP1 + 1;
7521 count1 = (adj + N12 - N11) / STEP1;
7522 count = count1 * count2 * count3;
7523 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
7524 count = 0;
7525 and set ZERO_ITER_BB to that bb. If this isn't the outermost
7526 of the combined loop constructs, just initialize COUNTS array
7527 from the _looptemp_ clauses. */
7528
7529 /* NOTE: It *could* be better to moosh all of the BBs together,
7530 creating one larger BB with all the computation and the unexpected
7531 jump at the end. I.e.
7532
7533 bool zero3, zero2, zero1, zero;
7534
7535 zero3 = N32 c3 N31;
7536 count3 = (N32 - N31) /[cl] STEP3;
7537 zero2 = N22 c2 N21;
7538 count2 = (N22 - N21) /[cl] STEP2;
7539 zero1 = N12 c1 N11;
7540 count1 = (N12 - N11) /[cl] STEP1;
7541 zero = zero3 || zero2 || zero1;
7542 count = count1 * count2 * count3;
7543 if (__builtin_expect(zero, false)) goto zero_iter_bb;
7544
7545 After all, we expect the zero=false, and thus we expect to have to
7546 evaluate all of the comparison expressions, so short-circuiting
7547 oughtn't be a win. Since the condition isn't protecting a
7548 denominator, we're not concerned about divide-by-zero, so we can
7549 fully evaluate count even if a numerator turned out to be wrong.
7550
7551 It seems like putting this all together would create much better
7552 scheduling opportunities, and less pressure on the chip's branch
7553 predictor. */
7554
7555 static void
7556 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
7557 basic_block &entry_bb, tree *counts,
7558 basic_block &zero_iter1_bb, int &first_zero_iter1,
7559 basic_block &zero_iter2_bb, int &first_zero_iter2,
7560 basic_block &l2_dom_bb)
7561 {
7562 tree t, type = TREE_TYPE (fd->loop.v);
7563 edge e, ne;
7564 int i;
7565
7566 /* Collapsed loops need work for expansion into SSA form. */
7567 gcc_assert (!gimple_in_ssa_p (cfun));
7568
7569 if (gimple_omp_for_combined_into_p (fd->for_stmt)
7570 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7571 {
7572 gcc_assert (fd->ordered == 0);
7573 /* First two _looptemp_ clauses are for istart/iend, counts[0]
7574 isn't supposed to be handled, as the inner loop doesn't
7575 use it. */
7576 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
7577 OMP_CLAUSE__LOOPTEMP_);
7578 gcc_assert (innerc);
7579 for (i = 0; i < fd->collapse; i++)
7580 {
7581 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
7582 OMP_CLAUSE__LOOPTEMP_);
7583 gcc_assert (innerc);
7584 if (i)
7585 counts[i] = OMP_CLAUSE_DECL (innerc);
7586 else
7587 counts[0] = NULL_TREE;
7588 }
7589 return;
7590 }
7591
7592 for (i = fd->collapse; i < fd->ordered; i++)
7593 {
7594 tree itype = TREE_TYPE (fd->loops[i].v);
7595 counts[i] = NULL_TREE;
7596 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
7597 fold_convert (itype, fd->loops[i].n1),
7598 fold_convert (itype, fd->loops[i].n2));
7599 if (t && integer_zerop (t))
7600 {
7601 for (i = fd->collapse; i < fd->ordered; i++)
7602 counts[i] = build_int_cst (type, 0);
7603 break;
7604 }
7605 }
7606 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
7607 {
7608 tree itype = TREE_TYPE (fd->loops[i].v);
7609
7610 if (i >= fd->collapse && counts[i])
7611 continue;
7612 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
7613 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
7614 fold_convert (itype, fd->loops[i].n1),
7615 fold_convert (itype, fd->loops[i].n2)))
7616 == NULL_TREE || !integer_onep (t)))
7617 {
7618 gcond *cond_stmt;
7619 tree n1, n2;
7620 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
7621 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
7622 true, GSI_SAME_STMT);
7623 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
7624 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
7625 true, GSI_SAME_STMT);
7626 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
7627 NULL_TREE, NULL_TREE);
7628 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
7629 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
7630 expand_omp_regimplify_p, NULL, NULL)
7631 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
7632 expand_omp_regimplify_p, NULL, NULL))
7633 {
7634 *gsi = gsi_for_stmt (cond_stmt);
7635 gimple_regimplify_operands (cond_stmt, gsi);
7636 }
7637 e = split_block (entry_bb, cond_stmt);
7638 basic_block &zero_iter_bb
7639 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
7640 int &first_zero_iter
7641 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
7642 if (zero_iter_bb == NULL)
7643 {
7644 gassign *assign_stmt;
7645 first_zero_iter = i;
7646 zero_iter_bb = create_empty_bb (entry_bb);
7647 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
7648 *gsi = gsi_after_labels (zero_iter_bb);
7649 if (i < fd->collapse)
7650 assign_stmt = gimple_build_assign (fd->loop.n2,
7651 build_zero_cst (type));
7652 else
7653 {
7654 counts[i] = create_tmp_reg (type, ".count");
7655 assign_stmt
7656 = gimple_build_assign (counts[i], build_zero_cst (type));
7657 }
7658 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
7659 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
7660 entry_bb);
7661 }
7662 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
7663 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
7664 e->flags = EDGE_TRUE_VALUE;
7665 e->probability = REG_BR_PROB_BASE - ne->probability;
7666 if (l2_dom_bb == NULL)
7667 l2_dom_bb = entry_bb;
7668 entry_bb = e->dest;
7669 *gsi = gsi_last_bb (entry_bb);
7670 }
7671
7672 if (POINTER_TYPE_P (itype))
7673 itype = signed_type_for (itype);
7674 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
7675 ? -1 : 1));
7676 t = fold_build2 (PLUS_EXPR, itype,
7677 fold_convert (itype, fd->loops[i].step), t);
7678 t = fold_build2 (PLUS_EXPR, itype, t,
7679 fold_convert (itype, fd->loops[i].n2));
7680 t = fold_build2 (MINUS_EXPR, itype, t,
7681 fold_convert (itype, fd->loops[i].n1));
7682 /* ?? We could probably use CEIL_DIV_EXPR instead of
7683 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
7684 generate the same code in the end because generically we
7685 don't know that the values involved must be negative for
7686 GT?? */
7687 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
7688 t = fold_build2 (TRUNC_DIV_EXPR, itype,
7689 fold_build1 (NEGATE_EXPR, itype, t),
7690 fold_build1 (NEGATE_EXPR, itype,
7691 fold_convert (itype,
7692 fd->loops[i].step)));
7693 else
7694 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
7695 fold_convert (itype, fd->loops[i].step));
7696 t = fold_convert (type, t);
7697 if (TREE_CODE (t) == INTEGER_CST)
7698 counts[i] = t;
7699 else
7700 {
7701 if (i < fd->collapse || i != first_zero_iter2)
7702 counts[i] = create_tmp_reg (type, ".count");
7703 expand_omp_build_assign (gsi, counts[i], t);
7704 }
7705 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
7706 {
7707 if (i == 0)
7708 t = counts[0];
7709 else
7710 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
7711 expand_omp_build_assign (gsi, fd->loop.n2, t);
7712 }
7713 }
7714 }
7715
7716
7717 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
7718 T = V;
7719 V3 = N31 + (T % count3) * STEP3;
7720 T = T / count3;
7721 V2 = N21 + (T % count2) * STEP2;
7722 T = T / count2;
7723 V1 = N11 + T * STEP1;
7724 if this loop doesn't have an inner loop construct combined with it.
7725 If it does have an inner loop construct combined with it and the
7726 iteration count isn't known constant, store values from counts array
7727 into its _looptemp_ temporaries instead. */
7728
7729 static void
7730 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
7731 tree *counts, gimple *inner_stmt, tree startvar)
7732 {
7733 int i;
7734 if (gimple_omp_for_combined_p (fd->for_stmt))
7735 {
7736 /* If fd->loop.n2 is constant, then no propagation of the counts
7737 is needed, they are constant. */
7738 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
7739 return;
7740
7741 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
7742 ? gimple_omp_taskreg_clauses (inner_stmt)
7743 : gimple_omp_for_clauses (inner_stmt);
7744 /* First two _looptemp_ clauses are for istart/iend, counts[0]
7745 isn't supposed to be handled, as the inner loop doesn't
7746 use it. */
7747 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7748 gcc_assert (innerc);
7749 for (i = 0; i < fd->collapse; i++)
7750 {
7751 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
7752 OMP_CLAUSE__LOOPTEMP_);
7753 gcc_assert (innerc);
7754 if (i)
7755 {
7756 tree tem = OMP_CLAUSE_DECL (innerc);
7757 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
7758 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
7759 false, GSI_CONTINUE_LINKING);
7760 gassign *stmt = gimple_build_assign (tem, t);
7761 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7762 }
7763 }
7764 return;
7765 }
7766
7767 tree type = TREE_TYPE (fd->loop.v);
7768 tree tem = create_tmp_reg (type, ".tem");
7769 gassign *stmt = gimple_build_assign (tem, startvar);
7770 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7771
7772 for (i = fd->collapse - 1; i >= 0; i--)
7773 {
7774 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
7775 itype = vtype;
7776 if (POINTER_TYPE_P (vtype))
7777 itype = signed_type_for (vtype);
7778 if (i != 0)
7779 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
7780 else
7781 t = tem;
7782 t = fold_convert (itype, t);
7783 t = fold_build2 (MULT_EXPR, itype, t,
7784 fold_convert (itype, fd->loops[i].step));
7785 if (POINTER_TYPE_P (vtype))
7786 t = fold_build_pointer_plus (fd->loops[i].n1, t);
7787 else
7788 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
7789 t = force_gimple_operand_gsi (gsi, t,
7790 DECL_P (fd->loops[i].v)
7791 && TREE_ADDRESSABLE (fd->loops[i].v),
7792 NULL_TREE, false,
7793 GSI_CONTINUE_LINKING);
7794 stmt = gimple_build_assign (fd->loops[i].v, t);
7795 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7796 if (i != 0)
7797 {
7798 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
7799 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
7800 false, GSI_CONTINUE_LINKING);
7801 stmt = gimple_build_assign (tem, t);
7802 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
7803 }
7804 }
7805 }
7806
7807
7808 /* Helper function for expand_omp_for_*. Generate code like:
7809 L10:
7810 V3 += STEP3;
7811 if (V3 cond3 N32) goto BODY_BB; else goto L11;
7812 L11:
7813 V3 = N31;
7814 V2 += STEP2;
7815 if (V2 cond2 N22) goto BODY_BB; else goto L12;
7816 L12:
7817 V2 = N21;
7818 V1 += STEP1;
7819 goto BODY_BB; */
7820
7821 static basic_block
7822 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
7823 basic_block body_bb)
7824 {
7825 basic_block last_bb, bb, collapse_bb = NULL;
7826 int i;
7827 gimple_stmt_iterator gsi;
7828 edge e;
7829 tree t;
7830 gimple *stmt;
7831
7832 last_bb = cont_bb;
7833 for (i = fd->collapse - 1; i >= 0; i--)
7834 {
7835 tree vtype = TREE_TYPE (fd->loops[i].v);
7836
7837 bb = create_empty_bb (last_bb);
7838 add_bb_to_loop (bb, last_bb->loop_father);
7839 gsi = gsi_start_bb (bb);
7840
7841 if (i < fd->collapse - 1)
7842 {
7843 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
7844 e->probability = REG_BR_PROB_BASE / 8;
7845
7846 t = fd->loops[i + 1].n1;
7847 t = force_gimple_operand_gsi (&gsi, t,
7848 DECL_P (fd->loops[i + 1].v)
7849 && TREE_ADDRESSABLE (fd->loops[i
7850 + 1].v),
7851 NULL_TREE, false,
7852 GSI_CONTINUE_LINKING);
7853 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
7854 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7855 }
7856 else
7857 collapse_bb = bb;
7858
7859 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
7860
7861 if (POINTER_TYPE_P (vtype))
7862 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
7863 else
7864 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
7865 t = force_gimple_operand_gsi (&gsi, t,
7866 DECL_P (fd->loops[i].v)
7867 && TREE_ADDRESSABLE (fd->loops[i].v),
7868 NULL_TREE, false, GSI_CONTINUE_LINKING);
7869 stmt = gimple_build_assign (fd->loops[i].v, t);
7870 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7871
7872 if (i > 0)
7873 {
7874 t = fd->loops[i].n2;
7875 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7876 false, GSI_CONTINUE_LINKING);
7877 tree v = fd->loops[i].v;
7878 if (DECL_P (v) && TREE_ADDRESSABLE (v))
7879 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
7880 false, GSI_CONTINUE_LINKING);
7881 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
7882 stmt = gimple_build_cond_empty (t);
7883 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7884 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
7885 e->probability = REG_BR_PROB_BASE * 7 / 8;
7886 }
7887 else
7888 make_edge (bb, body_bb, EDGE_FALLTHRU);
7889 last_bb = bb;
7890 }
7891
7892 return collapse_bb;
7893 }
7894
7895
7896 /* Expand #pragma omp ordered depend(source). */
7897
7898 static void
7899 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
7900 tree *counts, location_t loc)
7901 {
7902 enum built_in_function source_ix
7903 = fd->iter_type == long_integer_type_node
7904 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
7905 gimple *g
7906 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
7907 build_fold_addr_expr (counts[fd->ordered]));
7908 gimple_set_location (g, loc);
7909 gsi_insert_before (gsi, g, GSI_SAME_STMT);
7910 }
7911
7912 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
7913
7914 static void
7915 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
7916 tree *counts, tree c, location_t loc)
7917 {
7918 auto_vec<tree, 10> args;
7919 enum built_in_function sink_ix
7920 = fd->iter_type == long_integer_type_node
7921 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
7922 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
7923 int i;
7924 gimple_stmt_iterator gsi2 = *gsi;
7925 bool warned_step = false;
7926
7927 for (i = 0; i < fd->ordered; i++)
7928 {
7929 off = TREE_PURPOSE (deps);
7930 if (!integer_zerop (off))
7931 {
7932 gcc_assert (fd->loops[i].cond_code == LT_EXPR
7933 || fd->loops[i].cond_code == GT_EXPR);
7934 bool forward = fd->loops[i].cond_code == LT_EXPR;
7935 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
7936 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
7937 "lexically later iteration");
7938 break;
7939 }
7940 deps = TREE_CHAIN (deps);
7941 }
7942 /* If all offsets corresponding to the collapsed loops are zero,
7943 this depend clause can be ignored. FIXME: but there is still a
7944 flush needed. We need to emit one __sync_synchronize () for it
7945 though (perhaps conditionally)? Solve this together with the
7946 conservative dependence folding optimization.
7947 if (i >= fd->collapse)
7948 return; */
7949
7950 deps = OMP_CLAUSE_DECL (c);
7951 gsi_prev (&gsi2);
7952 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
7953 edge e2 = split_block_after_labels (e1->dest);
7954
7955 *gsi = gsi_after_labels (e1->dest);
7956 for (i = 0; i < fd->ordered; i++)
7957 {
7958 tree itype = TREE_TYPE (fd->loops[i].v);
7959 if (POINTER_TYPE_P (itype))
7960 itype = sizetype;
7961 if (i)
7962 deps = TREE_CHAIN (deps);
7963 off = TREE_PURPOSE (deps);
7964 tree s = fold_convert_loc (loc, itype, fd->loops[i].step);
7965
7966 if (integer_zerop (off))
7967 t = boolean_true_node;
7968 else
7969 {
7970 tree a;
7971 tree co = fold_convert_loc (loc, itype, off);
7972 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
7973 {
7974 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
7975 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
7976 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
7977 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
7978 co);
7979 }
7980 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
7981 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
7982 fd->loops[i].v, co);
7983 else
7984 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
7985 fd->loops[i].v, co);
7986 if (fd->loops[i].cond_code == LT_EXPR)
7987 {
7988 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
7989 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
7990 fd->loops[i].n1);
7991 else
7992 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
7993 fd->loops[i].n2);
7994 }
7995 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
7996 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
7997 fd->loops[i].n2);
7998 else
7999 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
8000 fd->loops[i].n1);
8001 }
8002 if (cond)
8003 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
8004 else
8005 cond = t;
8006
8007 off = fold_convert_loc (loc, itype, off);
8008
8009 if (fd->loops[i].cond_code == LT_EXPR
8010 ? !integer_onep (fd->loops[i].step)
8011 : !integer_minus_onep (fd->loops[i].step))
8012 {
8013 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
8014 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
8015 fold_build1_loc (loc, NEGATE_EXPR, itype,
8016 s));
8017 else
8018 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, s);
8019 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
8020 build_int_cst (itype, 0));
8021 if (integer_zerop (t) && !warned_step)
8022 {
8023 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
8024 "in the iteration space");
8025 warned_step = true;
8026 }
8027 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
8028 cond, t);
8029 }
8030
8031 if (i <= fd->collapse - 1 && fd->collapse > 1)
8032 t = fd->loop.v;
8033 else if (counts[i])
8034 t = counts[i];
8035 else
8036 {
8037 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8038 fd->loops[i].v, fd->loops[i].n1);
8039 t = fold_convert_loc (loc, fd->iter_type, t);
8040 }
8041 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
8042 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
8043 fold_build1_loc (loc, NEGATE_EXPR, itype,
8044 s));
8045 else
8046 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
8047 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
8048 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
8049 off = fold_convert_loc (loc, fd->iter_type, off);
8050 if (i <= fd->collapse - 1 && fd->collapse > 1)
8051 {
8052 if (i)
8053 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
8054 off);
8055 if (i < fd->collapse - 1)
8056 {
8057 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
8058 counts[i]);
8059 continue;
8060 }
8061 }
8062 off = unshare_expr (off);
8063 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
8064 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
8065 true, GSI_SAME_STMT);
8066 args.safe_push (t);
8067 }
8068 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
8069 gimple_set_location (g, loc);
8070 gsi_insert_before (gsi, g, GSI_SAME_STMT);
8071
8072 *gsi = gsi_last_bb (e1->src);
8073 cond = unshare_expr (cond);
8074 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
8075 GSI_CONTINUE_LINKING);
8076 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
8077 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
8078 e3->probability = REG_BR_PROB_BASE / 8;
8079 e1->probability = REG_BR_PROB_BASE - e3->probability;
8080 e1->flags = EDGE_TRUE_VALUE;
8081 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
8082
8083 *gsi = gsi_after_labels (e2->dest);
8084 }
8085
8086 /* Expand all #pragma omp ordered depend(source) and
8087 #pragma omp ordered depend(sink:...) constructs in the current
8088 #pragma omp for ordered(n) region. */
8089
8090 static void
8091 expand_omp_ordered_source_sink (struct omp_region *region,
8092 struct omp_for_data *fd, tree *counts,
8093 basic_block cont_bb)
8094 {
8095 struct omp_region *inner;
8096 int i;
8097 for (i = fd->collapse - 1; i < fd->ordered; i++)
8098 if (i == fd->collapse - 1 && fd->collapse > 1)
8099 counts[i] = NULL_TREE;
8100 else if (i >= fd->collapse && !cont_bb)
8101 counts[i] = build_zero_cst (fd->iter_type);
8102 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
8103 && integer_onep (fd->loops[i].step))
8104 counts[i] = NULL_TREE;
8105 else
8106 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
8107 tree atype
8108 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
8109 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
8110 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
8111
8112 for (inner = region->inner; inner; inner = inner->next)
8113 if (inner->type == GIMPLE_OMP_ORDERED)
8114 {
8115 gomp_ordered *ord_stmt = inner->ord_stmt;
8116 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
8117 location_t loc = gimple_location (ord_stmt);
8118 tree c;
8119 for (c = gimple_omp_ordered_clauses (ord_stmt);
8120 c; c = OMP_CLAUSE_CHAIN (c))
8121 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
8122 break;
8123 if (c)
8124 expand_omp_ordered_source (&gsi, fd, counts, loc);
8125 for (c = gimple_omp_ordered_clauses (ord_stmt);
8126 c; c = OMP_CLAUSE_CHAIN (c))
8127 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
8128 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
8129 gsi_remove (&gsi, true);
8130 }
8131 }
8132
8133 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
8134 collapsed. */
8135
8136 static basic_block
8137 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
8138 basic_block cont_bb, basic_block body_bb,
8139 bool ordered_lastprivate)
8140 {
8141 if (fd->ordered == fd->collapse)
8142 return cont_bb;
8143
8144 if (!cont_bb)
8145 {
8146 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8147 for (int i = fd->collapse; i < fd->ordered; i++)
8148 {
8149 tree type = TREE_TYPE (fd->loops[i].v);
8150 tree n1 = fold_convert (type, fd->loops[i].n1);
8151 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
8152 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8153 size_int (i - fd->collapse + 1),
8154 NULL_TREE, NULL_TREE);
8155 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
8156 }
8157 return NULL;
8158 }
8159
8160 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
8161 {
8162 tree t, type = TREE_TYPE (fd->loops[i].v);
8163 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8164 expand_omp_build_assign (&gsi, fd->loops[i].v,
8165 fold_convert (type, fd->loops[i].n1));
8166 if (counts[i])
8167 expand_omp_build_assign (&gsi, counts[i],
8168 build_zero_cst (fd->iter_type));
8169 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8170 size_int (i - fd->collapse + 1),
8171 NULL_TREE, NULL_TREE);
8172 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
8173 if (!gsi_end_p (gsi))
8174 gsi_prev (&gsi);
8175 else
8176 gsi = gsi_last_bb (body_bb);
8177 edge e1 = split_block (body_bb, gsi_stmt (gsi));
8178 basic_block new_body = e1->dest;
8179 if (body_bb == cont_bb)
8180 cont_bb = new_body;
8181 edge e2 = NULL;
8182 basic_block new_header;
8183 if (EDGE_COUNT (cont_bb->preds) > 0)
8184 {
8185 gsi = gsi_last_bb (cont_bb);
8186 if (POINTER_TYPE_P (type))
8187 t = fold_build_pointer_plus (fd->loops[i].v,
8188 fold_convert (sizetype,
8189 fd->loops[i].step));
8190 else
8191 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
8192 fold_convert (type, fd->loops[i].step));
8193 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
8194 if (counts[i])
8195 {
8196 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
8197 build_int_cst (fd->iter_type, 1));
8198 expand_omp_build_assign (&gsi, counts[i], t);
8199 t = counts[i];
8200 }
8201 else
8202 {
8203 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
8204 fd->loops[i].v, fd->loops[i].n1);
8205 t = fold_convert (fd->iter_type, t);
8206 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8207 true, GSI_SAME_STMT);
8208 }
8209 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8210 size_int (i - fd->collapse + 1),
8211 NULL_TREE, NULL_TREE);
8212 expand_omp_build_assign (&gsi, aref, t);
8213 gsi_prev (&gsi);
8214 e2 = split_block (cont_bb, gsi_stmt (gsi));
8215 new_header = e2->dest;
8216 }
8217 else
8218 new_header = cont_bb;
8219 gsi = gsi_after_labels (new_header);
8220 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
8221 true, GSI_SAME_STMT);
8222 tree n2
8223 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
8224 true, NULL_TREE, true, GSI_SAME_STMT);
8225 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
8226 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
8227 edge e3 = split_block (new_header, gsi_stmt (gsi));
8228 cont_bb = e3->dest;
8229 remove_edge (e1);
8230 make_edge (body_bb, new_header, EDGE_FALLTHRU);
8231 e3->flags = EDGE_FALSE_VALUE;
8232 e3->probability = REG_BR_PROB_BASE / 8;
8233 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
8234 e1->probability = REG_BR_PROB_BASE - e3->probability;
8235
8236 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
8237 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
8238
8239 if (e2)
8240 {
8241 struct loop *loop = alloc_loop ();
8242 loop->header = new_header;
8243 loop->latch = e2->src;
8244 add_loop (loop, body_bb->loop_father);
8245 }
8246 }
8247
8248 /* If there are any lastprivate clauses and it is possible some loops
8249 might have zero iterations, ensure all the decls are initialized,
8250 otherwise we could crash evaluating C++ class iterators with lastprivate
8251 clauses. */
8252 bool need_inits = false;
8253 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
8254 if (need_inits)
8255 {
8256 tree type = TREE_TYPE (fd->loops[i].v);
8257 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
8258 expand_omp_build_assign (&gsi, fd->loops[i].v,
8259 fold_convert (type, fd->loops[i].n1));
8260 }
8261 else
8262 {
8263 tree type = TREE_TYPE (fd->loops[i].v);
8264 tree this_cond = fold_build2 (fd->loops[i].cond_code,
8265 boolean_type_node,
8266 fold_convert (type, fd->loops[i].n1),
8267 fold_convert (type, fd->loops[i].n2));
8268 if (!integer_onep (this_cond))
8269 need_inits = true;
8270 }
8271
8272 return cont_bb;
8273 }
8274
8275
8276 /* A subroutine of expand_omp_for. Generate code for a parallel
8277 loop with any schedule. Given parameters:
8278
8279 for (V = N1; V cond N2; V += STEP) BODY;
8280
8281 where COND is "<" or ">", we generate pseudocode
8282
8283 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
8284 if (more) goto L0; else goto L3;
8285 L0:
8286 V = istart0;
8287 iend = iend0;
8288 L1:
8289 BODY;
8290 V += STEP;
8291 if (V cond iend) goto L1; else goto L2;
8292 L2:
8293 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
8294 L3:
8295
8296 If this is a combined omp parallel loop, instead of the call to
8297 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
8298 If this is gimple_omp_for_combined_p loop, then instead of assigning
8299 V and iend in L0 we assign the first two _looptemp_ clause decls of the
8300 inner GIMPLE_OMP_FOR and V += STEP; and
8301 if (V cond iend) goto L1; else goto L2; are removed.
8302
8303 For collapsed loops, given parameters:
8304 collapse(3)
8305 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
8306 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
8307 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
8308 BODY;
8309
8310 we generate pseudocode
8311
8312 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
8313 if (cond3 is <)
8314 adj = STEP3 - 1;
8315 else
8316 adj = STEP3 + 1;
8317 count3 = (adj + N32 - N31) / STEP3;
8318 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
8319 if (cond2 is <)
8320 adj = STEP2 - 1;
8321 else
8322 adj = STEP2 + 1;
8323 count2 = (adj + N22 - N21) / STEP2;
8324 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
8325 if (cond1 is <)
8326 adj = STEP1 - 1;
8327 else
8328 adj = STEP1 + 1;
8329 count1 = (adj + N12 - N11) / STEP1;
8330 count = count1 * count2 * count3;
8331 goto Z1;
8332 Z0:
8333 count = 0;
8334 Z1:
8335 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
8336 if (more) goto L0; else goto L3;
8337 L0:
8338 V = istart0;
8339 T = V;
8340 V3 = N31 + (T % count3) * STEP3;
8341 T = T / count3;
8342 V2 = N21 + (T % count2) * STEP2;
8343 T = T / count2;
8344 V1 = N11 + T * STEP1;
8345 iend = iend0;
8346 L1:
8347 BODY;
8348 V += 1;
8349 if (V < iend) goto L10; else goto L2;
8350 L10:
8351 V3 += STEP3;
8352 if (V3 cond3 N32) goto L1; else goto L11;
8353 L11:
8354 V3 = N31;
8355 V2 += STEP2;
8356 if (V2 cond2 N22) goto L1; else goto L12;
8357 L12:
8358 V2 = N21;
8359 V1 += STEP1;
8360 goto L1;
8361 L2:
8362 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
8363 L3:
8364
8365 */
8366
8367 static void
8368 expand_omp_for_generic (struct omp_region *region,
8369 struct omp_for_data *fd,
8370 enum built_in_function start_fn,
8371 enum built_in_function next_fn,
8372 gimple *inner_stmt)
8373 {
8374 tree type, istart0, iend0, iend;
8375 tree t, vmain, vback, bias = NULL_TREE;
8376 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
8377 basic_block l2_bb = NULL, l3_bb = NULL;
8378 gimple_stmt_iterator gsi;
8379 gassign *assign_stmt;
8380 bool in_combined_parallel = is_combined_parallel (region);
8381 bool broken_loop = region->cont == NULL;
8382 edge e, ne;
8383 tree *counts = NULL;
8384 int i;
8385 bool ordered_lastprivate = false;
8386
8387 gcc_assert (!broken_loop || !in_combined_parallel);
8388 gcc_assert (fd->iter_type == long_integer_type_node
8389 || !in_combined_parallel);
8390
8391 entry_bb = region->entry;
8392 cont_bb = region->cont;
8393 collapse_bb = NULL;
8394 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
8395 gcc_assert (broken_loop
8396 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
8397 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
8398 l1_bb = single_succ (l0_bb);
8399 if (!broken_loop)
8400 {
8401 l2_bb = create_empty_bb (cont_bb);
8402 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
8403 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
8404 == l1_bb));
8405 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
8406 }
8407 else
8408 l2_bb = NULL;
8409 l3_bb = BRANCH_EDGE (entry_bb)->dest;
8410 exit_bb = region->exit;
8411
8412 gsi = gsi_last_bb (entry_bb);
8413
8414 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
8415 if (fd->ordered
8416 && find_omp_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
8417 OMP_CLAUSE_LASTPRIVATE))
8418 ordered_lastprivate = false;
8419 if (fd->collapse > 1 || fd->ordered)
8420 {
8421 int first_zero_iter1 = -1, first_zero_iter2 = -1;
8422 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
8423
8424 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
8425 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
8426 zero_iter1_bb, first_zero_iter1,
8427 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
8428
8429 if (zero_iter1_bb)
8430 {
8431 /* Some counts[i] vars might be uninitialized if
8432 some loop has zero iterations. But the body shouldn't
8433 be executed in that case, so just avoid uninit warnings. */
8434 for (i = first_zero_iter1;
8435 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
8436 if (SSA_VAR_P (counts[i]))
8437 TREE_NO_WARNING (counts[i]) = 1;
8438 gsi_prev (&gsi);
8439 e = split_block (entry_bb, gsi_stmt (gsi));
8440 entry_bb = e->dest;
8441 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
8442 gsi = gsi_last_bb (entry_bb);
8443 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
8444 get_immediate_dominator (CDI_DOMINATORS,
8445 zero_iter1_bb));
8446 }
8447 if (zero_iter2_bb)
8448 {
8449 /* Some counts[i] vars might be uninitialized if
8450 some loop has zero iterations. But the body shouldn't
8451 be executed in that case, so just avoid uninit warnings. */
8452 for (i = first_zero_iter2; i < fd->ordered; i++)
8453 if (SSA_VAR_P (counts[i]))
8454 TREE_NO_WARNING (counts[i]) = 1;
8455 if (zero_iter1_bb)
8456 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
8457 else
8458 {
8459 gsi_prev (&gsi);
8460 e = split_block (entry_bb, gsi_stmt (gsi));
8461 entry_bb = e->dest;
8462 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
8463 gsi = gsi_last_bb (entry_bb);
8464 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
8465 get_immediate_dominator
8466 (CDI_DOMINATORS, zero_iter2_bb));
8467 }
8468 }
8469 if (fd->collapse == 1)
8470 {
8471 counts[0] = fd->loop.n2;
8472 fd->loop = fd->loops[0];
8473 }
8474 }
8475
8476 type = TREE_TYPE (fd->loop.v);
8477 istart0 = create_tmp_var (fd->iter_type, ".istart0");
8478 iend0 = create_tmp_var (fd->iter_type, ".iend0");
8479 TREE_ADDRESSABLE (istart0) = 1;
8480 TREE_ADDRESSABLE (iend0) = 1;
8481
8482 /* See if we need to bias by LLONG_MIN. */
8483 if (fd->iter_type == long_long_unsigned_type_node
8484 && TREE_CODE (type) == INTEGER_TYPE
8485 && !TYPE_UNSIGNED (type)
8486 && fd->ordered == 0)
8487 {
8488 tree n1, n2;
8489
8490 if (fd->loop.cond_code == LT_EXPR)
8491 {
8492 n1 = fd->loop.n1;
8493 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
8494 }
8495 else
8496 {
8497 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
8498 n2 = fd->loop.n1;
8499 }
8500 if (TREE_CODE (n1) != INTEGER_CST
8501 || TREE_CODE (n2) != INTEGER_CST
8502 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
8503 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
8504 }
8505
8506 gimple_stmt_iterator gsif = gsi;
8507 gsi_prev (&gsif);
8508
8509 tree arr = NULL_TREE;
8510 if (in_combined_parallel)
8511 {
8512 gcc_assert (fd->ordered == 0);
8513 /* In a combined parallel loop, emit a call to
8514 GOMP_loop_foo_next. */
8515 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
8516 build_fold_addr_expr (istart0),
8517 build_fold_addr_expr (iend0));
8518 }
8519 else
8520 {
8521 tree t0, t1, t2, t3, t4;
8522 /* If this is not a combined parallel loop, emit a call to
8523 GOMP_loop_foo_start in ENTRY_BB. */
8524 t4 = build_fold_addr_expr (iend0);
8525 t3 = build_fold_addr_expr (istart0);
8526 if (fd->ordered)
8527 {
8528 t0 = build_int_cst (unsigned_type_node,
8529 fd->ordered - fd->collapse + 1);
8530 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
8531 fd->ordered
8532 - fd->collapse + 1),
8533 ".omp_counts");
8534 DECL_NAMELESS (arr) = 1;
8535 TREE_ADDRESSABLE (arr) = 1;
8536 TREE_STATIC (arr) = 1;
8537 vec<constructor_elt, va_gc> *v;
8538 vec_alloc (v, fd->ordered - fd->collapse + 1);
8539 int idx;
8540
8541 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
8542 {
8543 tree c;
8544 if (idx == 0 && fd->collapse > 1)
8545 c = fd->loop.n2;
8546 else
8547 c = counts[idx + fd->collapse - 1];
8548 tree purpose = size_int (idx);
8549 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
8550 if (TREE_CODE (c) != INTEGER_CST)
8551 TREE_STATIC (arr) = 0;
8552 }
8553
8554 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
8555 if (!TREE_STATIC (arr))
8556 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
8557 void_type_node, arr),
8558 true, NULL_TREE, true, GSI_SAME_STMT);
8559 t1 = build_fold_addr_expr (arr);
8560 t2 = NULL_TREE;
8561 }
8562 else
8563 {
8564 t2 = fold_convert (fd->iter_type, fd->loop.step);
8565 t1 = fd->loop.n2;
8566 t0 = fd->loop.n1;
8567 if (gimple_omp_for_combined_into_p (fd->for_stmt))
8568 {
8569 tree innerc
8570 = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
8571 OMP_CLAUSE__LOOPTEMP_);
8572 gcc_assert (innerc);
8573 t0 = OMP_CLAUSE_DECL (innerc);
8574 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
8575 OMP_CLAUSE__LOOPTEMP_);
8576 gcc_assert (innerc);
8577 t1 = OMP_CLAUSE_DECL (innerc);
8578 }
8579 if (POINTER_TYPE_P (TREE_TYPE (t0))
8580 && TYPE_PRECISION (TREE_TYPE (t0))
8581 != TYPE_PRECISION (fd->iter_type))
8582 {
8583 /* Avoid casting pointers to integer of a different size. */
8584 tree itype = signed_type_for (type);
8585 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
8586 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
8587 }
8588 else
8589 {
8590 t1 = fold_convert (fd->iter_type, t1);
8591 t0 = fold_convert (fd->iter_type, t0);
8592 }
8593 if (bias)
8594 {
8595 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
8596 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
8597 }
8598 }
8599 if (fd->iter_type == long_integer_type_node || fd->ordered)
8600 {
8601 if (fd->chunk_size)
8602 {
8603 t = fold_convert (fd->iter_type, fd->chunk_size);
8604 t = omp_adjust_chunk_size (t, fd->simd_schedule);
8605 if (fd->ordered)
8606 t = build_call_expr (builtin_decl_explicit (start_fn),
8607 5, t0, t1, t, t3, t4);
8608 else
8609 t = build_call_expr (builtin_decl_explicit (start_fn),
8610 6, t0, t1, t2, t, t3, t4);
8611 }
8612 else if (fd->ordered)
8613 t = build_call_expr (builtin_decl_explicit (start_fn),
8614 4, t0, t1, t3, t4);
8615 else
8616 t = build_call_expr (builtin_decl_explicit (start_fn),
8617 5, t0, t1, t2, t3, t4);
8618 }
8619 else
8620 {
8621 tree t5;
8622 tree c_bool_type;
8623 tree bfn_decl;
8624
8625 /* The GOMP_loop_ull_*start functions have additional boolean
8626 argument, true for < loops and false for > loops.
8627 In Fortran, the C bool type can be different from
8628 boolean_type_node. */
8629 bfn_decl = builtin_decl_explicit (start_fn);
8630 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
8631 t5 = build_int_cst (c_bool_type,
8632 fd->loop.cond_code == LT_EXPR ? 1 : 0);
8633 if (fd->chunk_size)
8634 {
8635 tree bfn_decl = builtin_decl_explicit (start_fn);
8636 t = fold_convert (fd->iter_type, fd->chunk_size);
8637 t = omp_adjust_chunk_size (t, fd->simd_schedule);
8638 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
8639 }
8640 else
8641 t = build_call_expr (builtin_decl_explicit (start_fn),
8642 6, t5, t0, t1, t2, t3, t4);
8643 }
8644 }
8645 if (TREE_TYPE (t) != boolean_type_node)
8646 t = fold_build2 (NE_EXPR, boolean_type_node,
8647 t, build_int_cst (TREE_TYPE (t), 0));
8648 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8649 true, GSI_SAME_STMT);
8650 if (arr && !TREE_STATIC (arr))
8651 {
8652 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
8653 TREE_THIS_VOLATILE (clobber) = 1;
8654 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
8655 GSI_SAME_STMT);
8656 }
8657 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
8658
8659 /* Remove the GIMPLE_OMP_FOR statement. */
8660 gsi_remove (&gsi, true);
8661
8662 if (gsi_end_p (gsif))
8663 gsif = gsi_after_labels (gsi_bb (gsif));
8664 gsi_next (&gsif);
8665
8666 /* Iteration setup for sequential loop goes in L0_BB. */
8667 tree startvar = fd->loop.v;
8668 tree endvar = NULL_TREE;
8669
8670 if (gimple_omp_for_combined_p (fd->for_stmt))
8671 {
8672 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
8673 && gimple_omp_for_kind (inner_stmt)
8674 == GF_OMP_FOR_KIND_SIMD);
8675 tree innerc = find_omp_clause (gimple_omp_for_clauses (inner_stmt),
8676 OMP_CLAUSE__LOOPTEMP_);
8677 gcc_assert (innerc);
8678 startvar = OMP_CLAUSE_DECL (innerc);
8679 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
8680 OMP_CLAUSE__LOOPTEMP_);
8681 gcc_assert (innerc);
8682 endvar = OMP_CLAUSE_DECL (innerc);
8683 }
8684
8685 gsi = gsi_start_bb (l0_bb);
8686 t = istart0;
8687 if (fd->ordered && fd->collapse == 1)
8688 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
8689 fold_convert (fd->iter_type, fd->loop.step));
8690 else if (bias)
8691 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
8692 if (fd->ordered && fd->collapse == 1)
8693 {
8694 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8695 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
8696 fd->loop.n1, fold_convert (sizetype, t));
8697 else
8698 {
8699 t = fold_convert (TREE_TYPE (startvar), t);
8700 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
8701 fd->loop.n1, t);
8702 }
8703 }
8704 else
8705 {
8706 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8707 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
8708 t = fold_convert (TREE_TYPE (startvar), t);
8709 }
8710 t = force_gimple_operand_gsi (&gsi, t,
8711 DECL_P (startvar)
8712 && TREE_ADDRESSABLE (startvar),
8713 NULL_TREE, false, GSI_CONTINUE_LINKING);
8714 assign_stmt = gimple_build_assign (startvar, t);
8715 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8716
8717 t = iend0;
8718 if (fd->ordered && fd->collapse == 1)
8719 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
8720 fold_convert (fd->iter_type, fd->loop.step));
8721 else if (bias)
8722 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
8723 if (fd->ordered && fd->collapse == 1)
8724 {
8725 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8726 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
8727 fd->loop.n1, fold_convert (sizetype, t));
8728 else
8729 {
8730 t = fold_convert (TREE_TYPE (startvar), t);
8731 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
8732 fd->loop.n1, t);
8733 }
8734 }
8735 else
8736 {
8737 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
8738 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
8739 t = fold_convert (TREE_TYPE (startvar), t);
8740 }
8741 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8742 false, GSI_CONTINUE_LINKING);
8743 if (endvar)
8744 {
8745 assign_stmt = gimple_build_assign (endvar, iend);
8746 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8747 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
8748 assign_stmt = gimple_build_assign (fd->loop.v, iend);
8749 else
8750 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
8751 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8752 }
8753 /* Handle linear clause adjustments. */
8754 tree itercnt = NULL_TREE;
8755 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
8756 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
8757 c; c = OMP_CLAUSE_CHAIN (c))
8758 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
8759 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
8760 {
8761 tree d = OMP_CLAUSE_DECL (c);
8762 bool is_ref = is_reference (d);
8763 tree t = d, a, dest;
8764 if (is_ref)
8765 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
8766 tree type = TREE_TYPE (t);
8767 if (POINTER_TYPE_P (type))
8768 type = sizetype;
8769 dest = unshare_expr (t);
8770 tree v = create_tmp_var (TREE_TYPE (t), NULL);
8771 expand_omp_build_assign (&gsif, v, t);
8772 if (itercnt == NULL_TREE)
8773 {
8774 itercnt = startvar;
8775 tree n1 = fd->loop.n1;
8776 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
8777 {
8778 itercnt
8779 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
8780 itercnt);
8781 n1 = fold_convert (TREE_TYPE (itercnt), n1);
8782 }
8783 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
8784 itercnt, n1);
8785 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
8786 itercnt, fd->loop.step);
8787 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
8788 NULL_TREE, false,
8789 GSI_CONTINUE_LINKING);
8790 }
8791 a = fold_build2 (MULT_EXPR, type,
8792 fold_convert (type, itercnt),
8793 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
8794 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
8795 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
8796 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8797 false, GSI_CONTINUE_LINKING);
8798 assign_stmt = gimple_build_assign (dest, t);
8799 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
8800 }
8801 if (fd->collapse > 1)
8802 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
8803
8804 if (fd->ordered)
8805 {
8806 /* Until now, counts array contained number of iterations or
8807 variable containing it for ith loop. From now on, we need
8808 those counts only for collapsed loops, and only for the 2nd
8809 till the last collapsed one. Move those one element earlier,
8810 we'll use counts[fd->collapse - 1] for the first source/sink
8811 iteration counter and so on and counts[fd->ordered]
8812 as the array holding the current counter values for
8813 depend(source). */
8814 if (fd->collapse > 1)
8815 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
8816 if (broken_loop)
8817 {
8818 int i;
8819 for (i = fd->collapse; i < fd->ordered; i++)
8820 {
8821 tree type = TREE_TYPE (fd->loops[i].v);
8822 tree this_cond
8823 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
8824 fold_convert (type, fd->loops[i].n1),
8825 fold_convert (type, fd->loops[i].n2));
8826 if (!integer_onep (this_cond))
8827 break;
8828 }
8829 if (i < fd->ordered)
8830 {
8831 cont_bb
8832 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
8833 add_bb_to_loop (cont_bb, l1_bb->loop_father);
8834 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
8835 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
8836 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
8837 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
8838 make_edge (cont_bb, l1_bb, 0);
8839 l2_bb = create_empty_bb (cont_bb);
8840 broken_loop = false;
8841 }
8842 }
8843 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
8844 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
8845 ordered_lastprivate);
8846 if (counts[fd->collapse - 1])
8847 {
8848 gcc_assert (fd->collapse == 1);
8849 gsi = gsi_last_bb (l0_bb);
8850 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
8851 istart0, true);
8852 gsi = gsi_last_bb (cont_bb);
8853 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
8854 build_int_cst (fd->iter_type, 1));
8855 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
8856 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8857 size_zero_node, NULL_TREE, NULL_TREE);
8858 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
8859 t = counts[fd->collapse - 1];
8860 }
8861 else if (fd->collapse > 1)
8862 t = fd->loop.v;
8863 else
8864 {
8865 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
8866 fd->loops[0].v, fd->loops[0].n1);
8867 t = fold_convert (fd->iter_type, t);
8868 }
8869 gsi = gsi_last_bb (l0_bb);
8870 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
8871 size_zero_node, NULL_TREE, NULL_TREE);
8872 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8873 false, GSI_CONTINUE_LINKING);
8874 expand_omp_build_assign (&gsi, aref, t, true);
8875 }
8876
8877 if (!broken_loop)
8878 {
8879 /* Code to control the increment and predicate for the sequential
8880 loop goes in the CONT_BB. */
8881 gsi = gsi_last_bb (cont_bb);
8882 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
8883 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
8884 vmain = gimple_omp_continue_control_use (cont_stmt);
8885 vback = gimple_omp_continue_control_def (cont_stmt);
8886
8887 if (!gimple_omp_for_combined_p (fd->for_stmt))
8888 {
8889 if (POINTER_TYPE_P (type))
8890 t = fold_build_pointer_plus (vmain, fd->loop.step);
8891 else
8892 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
8893 t = force_gimple_operand_gsi (&gsi, t,
8894 DECL_P (vback)
8895 && TREE_ADDRESSABLE (vback),
8896 NULL_TREE, true, GSI_SAME_STMT);
8897 assign_stmt = gimple_build_assign (vback, t);
8898 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
8899
8900 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
8901 {
8902 if (fd->collapse > 1)
8903 t = fd->loop.v;
8904 else
8905 {
8906 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
8907 fd->loops[0].v, fd->loops[0].n1);
8908 t = fold_convert (fd->iter_type, t);
8909 }
8910 tree aref = build4 (ARRAY_REF, fd->iter_type,
8911 counts[fd->ordered], size_zero_node,
8912 NULL_TREE, NULL_TREE);
8913 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8914 true, GSI_SAME_STMT);
8915 expand_omp_build_assign (&gsi, aref, t);
8916 }
8917
8918 t = build2 (fd->loop.cond_code, boolean_type_node,
8919 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
8920 iend);
8921 gcond *cond_stmt = gimple_build_cond_empty (t);
8922 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
8923 }
8924
8925 /* Remove GIMPLE_OMP_CONTINUE. */
8926 gsi_remove (&gsi, true);
8927
8928 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
8929 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
8930
8931 /* Emit code to get the next parallel iteration in L2_BB. */
8932 gsi = gsi_start_bb (l2_bb);
8933
8934 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
8935 build_fold_addr_expr (istart0),
8936 build_fold_addr_expr (iend0));
8937 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
8938 false, GSI_CONTINUE_LINKING);
8939 if (TREE_TYPE (t) != boolean_type_node)
8940 t = fold_build2 (NE_EXPR, boolean_type_node,
8941 t, build_int_cst (TREE_TYPE (t), 0));
8942 gcond *cond_stmt = gimple_build_cond_empty (t);
8943 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
8944 }
8945
8946 /* Add the loop cleanup function. */
8947 gsi = gsi_last_bb (exit_bb);
8948 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
8949 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
8950 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
8951 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
8952 else
8953 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
8954 gcall *call_stmt = gimple_build_call (t, 0);
8955 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
8956 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
8957 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
8958 if (fd->ordered)
8959 {
8960 tree arr = counts[fd->ordered];
8961 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
8962 TREE_THIS_VOLATILE (clobber) = 1;
8963 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
8964 GSI_SAME_STMT);
8965 }
8966 gsi_remove (&gsi, true);
8967
8968 /* Connect the new blocks. */
8969 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
8970 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
8971
8972 if (!broken_loop)
8973 {
8974 gimple_seq phis;
8975
8976 e = find_edge (cont_bb, l3_bb);
8977 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
8978
8979 phis = phi_nodes (l3_bb);
8980 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
8981 {
8982 gimple *phi = gsi_stmt (gsi);
8983 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
8984 PHI_ARG_DEF_FROM_EDGE (phi, e));
8985 }
8986 remove_edge (e);
8987
8988 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
8989 e = find_edge (cont_bb, l1_bb);
8990 if (e == NULL)
8991 {
8992 e = BRANCH_EDGE (cont_bb);
8993 gcc_assert (single_succ (e->dest) == l1_bb);
8994 }
8995 if (gimple_omp_for_combined_p (fd->for_stmt))
8996 {
8997 remove_edge (e);
8998 e = NULL;
8999 }
9000 else if (fd->collapse > 1)
9001 {
9002 remove_edge (e);
9003 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
9004 }
9005 else
9006 e->flags = EDGE_TRUE_VALUE;
9007 if (e)
9008 {
9009 e->probability = REG_BR_PROB_BASE * 7 / 8;
9010 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
9011 }
9012 else
9013 {
9014 e = find_edge (cont_bb, l2_bb);
9015 e->flags = EDGE_FALLTHRU;
9016 }
9017 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
9018
9019 if (gimple_in_ssa_p (cfun))
9020 {
9021 /* Add phis to the outer loop that connect to the phis in the inner,
9022 original loop, and move the loop entry value of the inner phi to
9023 the loop entry value of the outer phi. */
9024 gphi_iterator psi;
9025 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
9026 {
9027 source_location locus;
9028 gphi *nphi;
9029 gphi *exit_phi = psi.phi ();
9030
9031 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
9032 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
9033
9034 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
9035 edge latch_to_l1 = find_edge (latch, l1_bb);
9036 gphi *inner_phi
9037 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
9038
9039 tree t = gimple_phi_result (exit_phi);
9040 tree new_res = copy_ssa_name (t, NULL);
9041 nphi = create_phi_node (new_res, l0_bb);
9042
9043 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
9044 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
9045 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
9046 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
9047 add_phi_arg (nphi, t, entry_to_l0, locus);
9048
9049 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
9050 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
9051
9052 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
9053 };
9054 }
9055
9056 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
9057 recompute_dominator (CDI_DOMINATORS, l2_bb));
9058 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
9059 recompute_dominator (CDI_DOMINATORS, l3_bb));
9060 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
9061 recompute_dominator (CDI_DOMINATORS, l0_bb));
9062 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
9063 recompute_dominator (CDI_DOMINATORS, l1_bb));
9064
9065 /* We enter expand_omp_for_generic with a loop. This original loop may
9066 have its own loop struct, or it may be part of an outer loop struct
9067 (which may be the fake loop). */
9068 struct loop *outer_loop = entry_bb->loop_father;
9069 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
9070
9071 add_bb_to_loop (l2_bb, outer_loop);
9072
9073 /* We've added a new loop around the original loop. Allocate the
9074 corresponding loop struct. */
9075 struct loop *new_loop = alloc_loop ();
9076 new_loop->header = l0_bb;
9077 new_loop->latch = l2_bb;
9078 add_loop (new_loop, outer_loop);
9079
9080 /* Allocate a loop structure for the original loop unless we already
9081 had one. */
9082 if (!orig_loop_has_loop_struct
9083 && !gimple_omp_for_combined_p (fd->for_stmt))
9084 {
9085 struct loop *orig_loop = alloc_loop ();
9086 orig_loop->header = l1_bb;
9087 /* The loop may have multiple latches. */
9088 add_loop (orig_loop, new_loop);
9089 }
9090 }
9091 }
9092
9093
9094 /* A subroutine of expand_omp_for. Generate code for a parallel
9095 loop with static schedule and no specified chunk size. Given
9096 parameters:
9097
9098 for (V = N1; V cond N2; V += STEP) BODY;
9099
9100 where COND is "<" or ">", we generate pseudocode
9101
9102 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
9103 if (cond is <)
9104 adj = STEP - 1;
9105 else
9106 adj = STEP + 1;
9107 if ((__typeof (V)) -1 > 0 && cond is >)
9108 n = -(adj + N2 - N1) / -STEP;
9109 else
9110 n = (adj + N2 - N1) / STEP;
9111 q = n / nthreads;
9112 tt = n % nthreads;
9113 if (threadid < tt) goto L3; else goto L4;
9114 L3:
9115 tt = 0;
9116 q = q + 1;
9117 L4:
9118 s0 = q * threadid + tt;
9119 e0 = s0 + q;
9120 V = s0 * STEP + N1;
9121 if (s0 >= e0) goto L2; else goto L0;
9122 L0:
9123 e = e0 * STEP + N1;
9124 L1:
9125 BODY;
9126 V += STEP;
9127 if (V cond e) goto L1;
9128 L2:
9129 */
9130
9131 static void
9132 expand_omp_for_static_nochunk (struct omp_region *region,
9133 struct omp_for_data *fd,
9134 gimple *inner_stmt)
9135 {
9136 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
9137 tree type, itype, vmain, vback;
9138 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
9139 basic_block body_bb, cont_bb, collapse_bb = NULL;
9140 basic_block fin_bb;
9141 gimple_stmt_iterator gsi;
9142 edge ep;
9143 bool broken_loop = region->cont == NULL;
9144 tree *counts = NULL;
9145 tree n1, n2, step;
9146
9147 itype = type = TREE_TYPE (fd->loop.v);
9148 if (POINTER_TYPE_P (type))
9149 itype = signed_type_for (type);
9150
9151 entry_bb = region->entry;
9152 cont_bb = region->cont;
9153 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
9154 fin_bb = BRANCH_EDGE (entry_bb)->dest;
9155 gcc_assert (broken_loop
9156 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
9157 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
9158 body_bb = single_succ (seq_start_bb);
9159 if (!broken_loop)
9160 {
9161 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
9162 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
9163 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
9164 }
9165 exit_bb = region->exit;
9166
9167 /* Iteration space partitioning goes in ENTRY_BB. */
9168 gsi = gsi_last_bb (entry_bb);
9169 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9170
9171 if (fd->collapse > 1)
9172 {
9173 int first_zero_iter = -1, dummy = -1;
9174 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
9175
9176 counts = XALLOCAVEC (tree, fd->collapse);
9177 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
9178 fin_bb, first_zero_iter,
9179 dummy_bb, dummy, l2_dom_bb);
9180 t = NULL_TREE;
9181 }
9182 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
9183 t = integer_one_node;
9184 else
9185 t = fold_binary (fd->loop.cond_code, boolean_type_node,
9186 fold_convert (type, fd->loop.n1),
9187 fold_convert (type, fd->loop.n2));
9188 if (fd->collapse == 1
9189 && TYPE_UNSIGNED (type)
9190 && (t == NULL_TREE || !integer_onep (t)))
9191 {
9192 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
9193 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
9194 true, GSI_SAME_STMT);
9195 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
9196 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
9197 true, GSI_SAME_STMT);
9198 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
9199 NULL_TREE, NULL_TREE);
9200 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9201 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
9202 expand_omp_regimplify_p, NULL, NULL)
9203 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
9204 expand_omp_regimplify_p, NULL, NULL))
9205 {
9206 gsi = gsi_for_stmt (cond_stmt);
9207 gimple_regimplify_operands (cond_stmt, &gsi);
9208 }
9209 ep = split_block (entry_bb, cond_stmt);
9210 ep->flags = EDGE_TRUE_VALUE;
9211 entry_bb = ep->dest;
9212 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
9213 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
9214 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
9215 if (gimple_in_ssa_p (cfun))
9216 {
9217 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
9218 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
9219 !gsi_end_p (gpi); gsi_next (&gpi))
9220 {
9221 gphi *phi = gpi.phi ();
9222 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
9223 ep, UNKNOWN_LOCATION);
9224 }
9225 }
9226 gsi = gsi_last_bb (entry_bb);
9227 }
9228
9229 switch (gimple_omp_for_kind (fd->for_stmt))
9230 {
9231 case GF_OMP_FOR_KIND_FOR:
9232 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
9233 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
9234 break;
9235 case GF_OMP_FOR_KIND_DISTRIBUTE:
9236 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
9237 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
9238 break;
9239 default:
9240 gcc_unreachable ();
9241 }
9242 nthreads = build_call_expr (nthreads, 0);
9243 nthreads = fold_convert (itype, nthreads);
9244 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
9245 true, GSI_SAME_STMT);
9246 threadid = build_call_expr (threadid, 0);
9247 threadid = fold_convert (itype, threadid);
9248 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
9249 true, GSI_SAME_STMT);
9250
9251 n1 = fd->loop.n1;
9252 n2 = fd->loop.n2;
9253 step = fd->loop.step;
9254 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9255 {
9256 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
9257 OMP_CLAUSE__LOOPTEMP_);
9258 gcc_assert (innerc);
9259 n1 = OMP_CLAUSE_DECL (innerc);
9260 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9261 OMP_CLAUSE__LOOPTEMP_);
9262 gcc_assert (innerc);
9263 n2 = OMP_CLAUSE_DECL (innerc);
9264 }
9265 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
9266 true, NULL_TREE, true, GSI_SAME_STMT);
9267 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
9268 true, NULL_TREE, true, GSI_SAME_STMT);
9269 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
9270 true, NULL_TREE, true, GSI_SAME_STMT);
9271
9272 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
9273 t = fold_build2 (PLUS_EXPR, itype, step, t);
9274 t = fold_build2 (PLUS_EXPR, itype, t, n2);
9275 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
9276 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
9277 t = fold_build2 (TRUNC_DIV_EXPR, itype,
9278 fold_build1 (NEGATE_EXPR, itype, t),
9279 fold_build1 (NEGATE_EXPR, itype, step));
9280 else
9281 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
9282 t = fold_convert (itype, t);
9283 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9284
9285 q = create_tmp_reg (itype, "q");
9286 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
9287 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
9288 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
9289
9290 tt = create_tmp_reg (itype, "tt");
9291 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
9292 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
9293 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
9294
9295 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
9296 gcond *cond_stmt = gimple_build_cond_empty (t);
9297 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9298
9299 second_bb = split_block (entry_bb, cond_stmt)->dest;
9300 gsi = gsi_last_bb (second_bb);
9301 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9302
9303 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
9304 GSI_SAME_STMT);
9305 gassign *assign_stmt
9306 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
9307 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9308
9309 third_bb = split_block (second_bb, assign_stmt)->dest;
9310 gsi = gsi_last_bb (third_bb);
9311 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9312
9313 t = build2 (MULT_EXPR, itype, q, threadid);
9314 t = build2 (PLUS_EXPR, itype, t, tt);
9315 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9316
9317 t = fold_build2 (PLUS_EXPR, itype, s0, q);
9318 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
9319
9320 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
9321 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
9322
9323 /* Remove the GIMPLE_OMP_FOR statement. */
9324 gsi_remove (&gsi, true);
9325
9326 /* Setup code for sequential iteration goes in SEQ_START_BB. */
9327 gsi = gsi_start_bb (seq_start_bb);
9328
9329 tree startvar = fd->loop.v;
9330 tree endvar = NULL_TREE;
9331
9332 if (gimple_omp_for_combined_p (fd->for_stmt))
9333 {
9334 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
9335 ? gimple_omp_parallel_clauses (inner_stmt)
9336 : gimple_omp_for_clauses (inner_stmt);
9337 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
9338 gcc_assert (innerc);
9339 startvar = OMP_CLAUSE_DECL (innerc);
9340 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9341 OMP_CLAUSE__LOOPTEMP_);
9342 gcc_assert (innerc);
9343 endvar = OMP_CLAUSE_DECL (innerc);
9344 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
9345 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
9346 {
9347 int i;
9348 for (i = 1; i < fd->collapse; i++)
9349 {
9350 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9351 OMP_CLAUSE__LOOPTEMP_);
9352 gcc_assert (innerc);
9353 }
9354 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9355 OMP_CLAUSE__LOOPTEMP_);
9356 if (innerc)
9357 {
9358 /* If needed (distribute parallel for with lastprivate),
9359 propagate down the total number of iterations. */
9360 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
9361 fd->loop.n2);
9362 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
9363 GSI_CONTINUE_LINKING);
9364 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
9365 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9366 }
9367 }
9368 }
9369 t = fold_convert (itype, s0);
9370 t = fold_build2 (MULT_EXPR, itype, t, step);
9371 if (POINTER_TYPE_P (type))
9372 t = fold_build_pointer_plus (n1, t);
9373 else
9374 t = fold_build2 (PLUS_EXPR, type, t, n1);
9375 t = fold_convert (TREE_TYPE (startvar), t);
9376 t = force_gimple_operand_gsi (&gsi, t,
9377 DECL_P (startvar)
9378 && TREE_ADDRESSABLE (startvar),
9379 NULL_TREE, false, GSI_CONTINUE_LINKING);
9380 assign_stmt = gimple_build_assign (startvar, t);
9381 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9382
9383 t = fold_convert (itype, e0);
9384 t = fold_build2 (MULT_EXPR, itype, t, step);
9385 if (POINTER_TYPE_P (type))
9386 t = fold_build_pointer_plus (n1, t);
9387 else
9388 t = fold_build2 (PLUS_EXPR, type, t, n1);
9389 t = fold_convert (TREE_TYPE (startvar), t);
9390 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9391 false, GSI_CONTINUE_LINKING);
9392 if (endvar)
9393 {
9394 assign_stmt = gimple_build_assign (endvar, e);
9395 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9396 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
9397 assign_stmt = gimple_build_assign (fd->loop.v, e);
9398 else
9399 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
9400 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9401 }
9402 /* Handle linear clause adjustments. */
9403 tree itercnt = NULL_TREE;
9404 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
9405 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
9406 c; c = OMP_CLAUSE_CHAIN (c))
9407 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
9408 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
9409 {
9410 tree d = OMP_CLAUSE_DECL (c);
9411 bool is_ref = is_reference (d);
9412 tree t = d, a, dest;
9413 if (is_ref)
9414 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
9415 if (itercnt == NULL_TREE)
9416 {
9417 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9418 {
9419 itercnt = fold_build2 (MINUS_EXPR, itype,
9420 fold_convert (itype, n1),
9421 fold_convert (itype, fd->loop.n1));
9422 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
9423 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
9424 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
9425 NULL_TREE, false,
9426 GSI_CONTINUE_LINKING);
9427 }
9428 else
9429 itercnt = s0;
9430 }
9431 tree type = TREE_TYPE (t);
9432 if (POINTER_TYPE_P (type))
9433 type = sizetype;
9434 a = fold_build2 (MULT_EXPR, type,
9435 fold_convert (type, itercnt),
9436 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
9437 dest = unshare_expr (t);
9438 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
9439 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
9440 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9441 false, GSI_CONTINUE_LINKING);
9442 assign_stmt = gimple_build_assign (dest, t);
9443 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9444 }
9445 if (fd->collapse > 1)
9446 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
9447
9448 if (!broken_loop)
9449 {
9450 /* The code controlling the sequential loop replaces the
9451 GIMPLE_OMP_CONTINUE. */
9452 gsi = gsi_last_bb (cont_bb);
9453 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
9454 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
9455 vmain = gimple_omp_continue_control_use (cont_stmt);
9456 vback = gimple_omp_continue_control_def (cont_stmt);
9457
9458 if (!gimple_omp_for_combined_p (fd->for_stmt))
9459 {
9460 if (POINTER_TYPE_P (type))
9461 t = fold_build_pointer_plus (vmain, step);
9462 else
9463 t = fold_build2 (PLUS_EXPR, type, vmain, step);
9464 t = force_gimple_operand_gsi (&gsi, t,
9465 DECL_P (vback)
9466 && TREE_ADDRESSABLE (vback),
9467 NULL_TREE, true, GSI_SAME_STMT);
9468 assign_stmt = gimple_build_assign (vback, t);
9469 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9470
9471 t = build2 (fd->loop.cond_code, boolean_type_node,
9472 DECL_P (vback) && TREE_ADDRESSABLE (vback)
9473 ? t : vback, e);
9474 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
9475 }
9476
9477 /* Remove the GIMPLE_OMP_CONTINUE statement. */
9478 gsi_remove (&gsi, true);
9479
9480 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
9481 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
9482 }
9483
9484 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
9485 gsi = gsi_last_bb (exit_bb);
9486 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
9487 {
9488 t = gimple_omp_return_lhs (gsi_stmt (gsi));
9489 gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
9490 }
9491 gsi_remove (&gsi, true);
9492
9493 /* Connect all the blocks. */
9494 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
9495 ep->probability = REG_BR_PROB_BASE / 4 * 3;
9496 ep = find_edge (entry_bb, second_bb);
9497 ep->flags = EDGE_TRUE_VALUE;
9498 ep->probability = REG_BR_PROB_BASE / 4;
9499 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
9500 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
9501
9502 if (!broken_loop)
9503 {
9504 ep = find_edge (cont_bb, body_bb);
9505 if (ep == NULL)
9506 {
9507 ep = BRANCH_EDGE (cont_bb);
9508 gcc_assert (single_succ (ep->dest) == body_bb);
9509 }
9510 if (gimple_omp_for_combined_p (fd->for_stmt))
9511 {
9512 remove_edge (ep);
9513 ep = NULL;
9514 }
9515 else if (fd->collapse > 1)
9516 {
9517 remove_edge (ep);
9518 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
9519 }
9520 else
9521 ep->flags = EDGE_TRUE_VALUE;
9522 find_edge (cont_bb, fin_bb)->flags
9523 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
9524 }
9525
9526 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
9527 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
9528 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
9529
9530 set_immediate_dominator (CDI_DOMINATORS, body_bb,
9531 recompute_dominator (CDI_DOMINATORS, body_bb));
9532 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
9533 recompute_dominator (CDI_DOMINATORS, fin_bb));
9534
9535 struct loop *loop = body_bb->loop_father;
9536 if (loop != entry_bb->loop_father)
9537 {
9538 gcc_assert (loop->header == body_bb);
9539 gcc_assert (broken_loop
9540 || loop->latch == region->cont
9541 || single_pred (loop->latch) == region->cont);
9542 return;
9543 }
9544
9545 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
9546 {
9547 loop = alloc_loop ();
9548 loop->header = body_bb;
9549 if (collapse_bb == NULL)
9550 loop->latch = cont_bb;
9551 add_loop (loop, body_bb->loop_father);
9552 }
9553 }
9554
9555 /* Return phi in E->DEST with ARG on edge E. */
9556
9557 static gphi *
9558 find_phi_with_arg_on_edge (tree arg, edge e)
9559 {
9560 basic_block bb = e->dest;
9561
9562 for (gphi_iterator gpi = gsi_start_phis (bb);
9563 !gsi_end_p (gpi);
9564 gsi_next (&gpi))
9565 {
9566 gphi *phi = gpi.phi ();
9567 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
9568 return phi;
9569 }
9570
9571 return NULL;
9572 }
9573
9574 /* A subroutine of expand_omp_for. Generate code for a parallel
9575 loop with static schedule and a specified chunk size. Given
9576 parameters:
9577
9578 for (V = N1; V cond N2; V += STEP) BODY;
9579
9580 where COND is "<" or ">", we generate pseudocode
9581
9582 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
9583 if (cond is <)
9584 adj = STEP - 1;
9585 else
9586 adj = STEP + 1;
9587 if ((__typeof (V)) -1 > 0 && cond is >)
9588 n = -(adj + N2 - N1) / -STEP;
9589 else
9590 n = (adj + N2 - N1) / STEP;
9591 trip = 0;
9592 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
9593 here so that V is defined
9594 if the loop is not entered
9595 L0:
9596 s0 = (trip * nthreads + threadid) * CHUNK;
9597 e0 = min(s0 + CHUNK, n);
9598 if (s0 < n) goto L1; else goto L4;
9599 L1:
9600 V = s0 * STEP + N1;
9601 e = e0 * STEP + N1;
9602 L2:
9603 BODY;
9604 V += STEP;
9605 if (V cond e) goto L2; else goto L3;
9606 L3:
9607 trip += 1;
9608 goto L0;
9609 L4:
9610 */
9611
9612 static void
9613 expand_omp_for_static_chunk (struct omp_region *region,
9614 struct omp_for_data *fd, gimple *inner_stmt)
9615 {
9616 tree n, s0, e0, e, t;
9617 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
9618 tree type, itype, vmain, vback, vextra;
9619 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
9620 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
9621 gimple_stmt_iterator gsi;
9622 edge se;
9623 bool broken_loop = region->cont == NULL;
9624 tree *counts = NULL;
9625 tree n1, n2, step;
9626
9627 itype = type = TREE_TYPE (fd->loop.v);
9628 if (POINTER_TYPE_P (type))
9629 itype = signed_type_for (type);
9630
9631 entry_bb = region->entry;
9632 se = split_block (entry_bb, last_stmt (entry_bb));
9633 entry_bb = se->src;
9634 iter_part_bb = se->dest;
9635 cont_bb = region->cont;
9636 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
9637 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
9638 gcc_assert (broken_loop
9639 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
9640 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
9641 body_bb = single_succ (seq_start_bb);
9642 if (!broken_loop)
9643 {
9644 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
9645 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
9646 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
9647 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
9648 }
9649 exit_bb = region->exit;
9650
9651 /* Trip and adjustment setup goes in ENTRY_BB. */
9652 gsi = gsi_last_bb (entry_bb);
9653 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
9654
9655 if (fd->collapse > 1)
9656 {
9657 int first_zero_iter = -1, dummy = -1;
9658 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
9659
9660 counts = XALLOCAVEC (tree, fd->collapse);
9661 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
9662 fin_bb, first_zero_iter,
9663 dummy_bb, dummy, l2_dom_bb);
9664 t = NULL_TREE;
9665 }
9666 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
9667 t = integer_one_node;
9668 else
9669 t = fold_binary (fd->loop.cond_code, boolean_type_node,
9670 fold_convert (type, fd->loop.n1),
9671 fold_convert (type, fd->loop.n2));
9672 if (fd->collapse == 1
9673 && TYPE_UNSIGNED (type)
9674 && (t == NULL_TREE || !integer_onep (t)))
9675 {
9676 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
9677 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
9678 true, GSI_SAME_STMT);
9679 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
9680 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
9681 true, GSI_SAME_STMT);
9682 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
9683 NULL_TREE, NULL_TREE);
9684 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
9685 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
9686 expand_omp_regimplify_p, NULL, NULL)
9687 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
9688 expand_omp_regimplify_p, NULL, NULL))
9689 {
9690 gsi = gsi_for_stmt (cond_stmt);
9691 gimple_regimplify_operands (cond_stmt, &gsi);
9692 }
9693 se = split_block (entry_bb, cond_stmt);
9694 se->flags = EDGE_TRUE_VALUE;
9695 entry_bb = se->dest;
9696 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
9697 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
9698 se->probability = REG_BR_PROB_BASE / 2000 - 1;
9699 if (gimple_in_ssa_p (cfun))
9700 {
9701 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
9702 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
9703 !gsi_end_p (gpi); gsi_next (&gpi))
9704 {
9705 gphi *phi = gpi.phi ();
9706 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
9707 se, UNKNOWN_LOCATION);
9708 }
9709 }
9710 gsi = gsi_last_bb (entry_bb);
9711 }
9712
9713 switch (gimple_omp_for_kind (fd->for_stmt))
9714 {
9715 case GF_OMP_FOR_KIND_FOR:
9716 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
9717 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
9718 break;
9719 case GF_OMP_FOR_KIND_DISTRIBUTE:
9720 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
9721 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
9722 break;
9723 default:
9724 gcc_unreachable ();
9725 }
9726 nthreads = build_call_expr (nthreads, 0);
9727 nthreads = fold_convert (itype, nthreads);
9728 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
9729 true, GSI_SAME_STMT);
9730 threadid = build_call_expr (threadid, 0);
9731 threadid = fold_convert (itype, threadid);
9732 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
9733 true, GSI_SAME_STMT);
9734
9735 n1 = fd->loop.n1;
9736 n2 = fd->loop.n2;
9737 step = fd->loop.step;
9738 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9739 {
9740 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
9741 OMP_CLAUSE__LOOPTEMP_);
9742 gcc_assert (innerc);
9743 n1 = OMP_CLAUSE_DECL (innerc);
9744 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9745 OMP_CLAUSE__LOOPTEMP_);
9746 gcc_assert (innerc);
9747 n2 = OMP_CLAUSE_DECL (innerc);
9748 }
9749 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
9750 true, NULL_TREE, true, GSI_SAME_STMT);
9751 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
9752 true, NULL_TREE, true, GSI_SAME_STMT);
9753 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
9754 true, NULL_TREE, true, GSI_SAME_STMT);
9755 tree chunk_size = fold_convert (itype, fd->chunk_size);
9756 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
9757 chunk_size
9758 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
9759 GSI_SAME_STMT);
9760
9761 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
9762 t = fold_build2 (PLUS_EXPR, itype, step, t);
9763 t = fold_build2 (PLUS_EXPR, itype, t, n2);
9764 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
9765 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
9766 t = fold_build2 (TRUNC_DIV_EXPR, itype,
9767 fold_build1 (NEGATE_EXPR, itype, t),
9768 fold_build1 (NEGATE_EXPR, itype, step));
9769 else
9770 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
9771 t = fold_convert (itype, t);
9772 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9773 true, GSI_SAME_STMT);
9774
9775 trip_var = create_tmp_reg (itype, ".trip");
9776 if (gimple_in_ssa_p (cfun))
9777 {
9778 trip_init = make_ssa_name (trip_var);
9779 trip_main = make_ssa_name (trip_var);
9780 trip_back = make_ssa_name (trip_var);
9781 }
9782 else
9783 {
9784 trip_init = trip_var;
9785 trip_main = trip_var;
9786 trip_back = trip_var;
9787 }
9788
9789 gassign *assign_stmt
9790 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
9791 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9792
9793 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
9794 t = fold_build2 (MULT_EXPR, itype, t, step);
9795 if (POINTER_TYPE_P (type))
9796 t = fold_build_pointer_plus (n1, t);
9797 else
9798 t = fold_build2 (PLUS_EXPR, type, t, n1);
9799 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9800 true, GSI_SAME_STMT);
9801
9802 /* Remove the GIMPLE_OMP_FOR. */
9803 gsi_remove (&gsi, true);
9804
9805 gimple_stmt_iterator gsif = gsi;
9806
9807 /* Iteration space partitioning goes in ITER_PART_BB. */
9808 gsi = gsi_last_bb (iter_part_bb);
9809
9810 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
9811 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
9812 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
9813 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9814 false, GSI_CONTINUE_LINKING);
9815
9816 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
9817 t = fold_build2 (MIN_EXPR, itype, t, n);
9818 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9819 false, GSI_CONTINUE_LINKING);
9820
9821 t = build2 (LT_EXPR, boolean_type_node, s0, n);
9822 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
9823
9824 /* Setup code for sequential iteration goes in SEQ_START_BB. */
9825 gsi = gsi_start_bb (seq_start_bb);
9826
9827 tree startvar = fd->loop.v;
9828 tree endvar = NULL_TREE;
9829
9830 if (gimple_omp_for_combined_p (fd->for_stmt))
9831 {
9832 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
9833 ? gimple_omp_parallel_clauses (inner_stmt)
9834 : gimple_omp_for_clauses (inner_stmt);
9835 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
9836 gcc_assert (innerc);
9837 startvar = OMP_CLAUSE_DECL (innerc);
9838 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9839 OMP_CLAUSE__LOOPTEMP_);
9840 gcc_assert (innerc);
9841 endvar = OMP_CLAUSE_DECL (innerc);
9842 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
9843 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
9844 {
9845 int i;
9846 for (i = 1; i < fd->collapse; i++)
9847 {
9848 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9849 OMP_CLAUSE__LOOPTEMP_);
9850 gcc_assert (innerc);
9851 }
9852 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
9853 OMP_CLAUSE__LOOPTEMP_);
9854 if (innerc)
9855 {
9856 /* If needed (distribute parallel for with lastprivate),
9857 propagate down the total number of iterations. */
9858 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
9859 fd->loop.n2);
9860 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
9861 GSI_CONTINUE_LINKING);
9862 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
9863 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9864 }
9865 }
9866 }
9867
9868 t = fold_convert (itype, s0);
9869 t = fold_build2 (MULT_EXPR, itype, t, step);
9870 if (POINTER_TYPE_P (type))
9871 t = fold_build_pointer_plus (n1, t);
9872 else
9873 t = fold_build2 (PLUS_EXPR, type, t, n1);
9874 t = fold_convert (TREE_TYPE (startvar), t);
9875 t = force_gimple_operand_gsi (&gsi, t,
9876 DECL_P (startvar)
9877 && TREE_ADDRESSABLE (startvar),
9878 NULL_TREE, false, GSI_CONTINUE_LINKING);
9879 assign_stmt = gimple_build_assign (startvar, t);
9880 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9881
9882 t = fold_convert (itype, e0);
9883 t = fold_build2 (MULT_EXPR, itype, t, step);
9884 if (POINTER_TYPE_P (type))
9885 t = fold_build_pointer_plus (n1, t);
9886 else
9887 t = fold_build2 (PLUS_EXPR, type, t, n1);
9888 t = fold_convert (TREE_TYPE (startvar), t);
9889 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9890 false, GSI_CONTINUE_LINKING);
9891 if (endvar)
9892 {
9893 assign_stmt = gimple_build_assign (endvar, e);
9894 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9895 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
9896 assign_stmt = gimple_build_assign (fd->loop.v, e);
9897 else
9898 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
9899 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9900 }
9901 /* Handle linear clause adjustments. */
9902 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
9903 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
9904 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
9905 c; c = OMP_CLAUSE_CHAIN (c))
9906 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
9907 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
9908 {
9909 tree d = OMP_CLAUSE_DECL (c);
9910 bool is_ref = is_reference (d);
9911 tree t = d, a, dest;
9912 if (is_ref)
9913 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
9914 tree type = TREE_TYPE (t);
9915 if (POINTER_TYPE_P (type))
9916 type = sizetype;
9917 dest = unshare_expr (t);
9918 tree v = create_tmp_var (TREE_TYPE (t), NULL);
9919 expand_omp_build_assign (&gsif, v, t);
9920 if (itercnt == NULL_TREE)
9921 {
9922 if (gimple_omp_for_combined_into_p (fd->for_stmt))
9923 {
9924 itercntbias
9925 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
9926 fold_convert (itype, fd->loop.n1));
9927 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
9928 itercntbias, step);
9929 itercntbias
9930 = force_gimple_operand_gsi (&gsif, itercntbias, true,
9931 NULL_TREE, true,
9932 GSI_SAME_STMT);
9933 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
9934 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
9935 NULL_TREE, false,
9936 GSI_CONTINUE_LINKING);
9937 }
9938 else
9939 itercnt = s0;
9940 }
9941 a = fold_build2 (MULT_EXPR, type,
9942 fold_convert (type, itercnt),
9943 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
9944 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
9945 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
9946 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9947 false, GSI_CONTINUE_LINKING);
9948 assign_stmt = gimple_build_assign (dest, t);
9949 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9950 }
9951 if (fd->collapse > 1)
9952 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
9953
9954 if (!broken_loop)
9955 {
9956 /* The code controlling the sequential loop goes in CONT_BB,
9957 replacing the GIMPLE_OMP_CONTINUE. */
9958 gsi = gsi_last_bb (cont_bb);
9959 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
9960 vmain = gimple_omp_continue_control_use (cont_stmt);
9961 vback = gimple_omp_continue_control_def (cont_stmt);
9962
9963 if (!gimple_omp_for_combined_p (fd->for_stmt))
9964 {
9965 if (POINTER_TYPE_P (type))
9966 t = fold_build_pointer_plus (vmain, step);
9967 else
9968 t = fold_build2 (PLUS_EXPR, type, vmain, step);
9969 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
9970 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
9971 true, GSI_SAME_STMT);
9972 assign_stmt = gimple_build_assign (vback, t);
9973 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
9974
9975 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
9976 t = build2 (EQ_EXPR, boolean_type_node,
9977 build_int_cst (itype, 0),
9978 build_int_cst (itype, 1));
9979 else
9980 t = build2 (fd->loop.cond_code, boolean_type_node,
9981 DECL_P (vback) && TREE_ADDRESSABLE (vback)
9982 ? t : vback, e);
9983 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
9984 }
9985
9986 /* Remove GIMPLE_OMP_CONTINUE. */
9987 gsi_remove (&gsi, true);
9988
9989 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
9990 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
9991
9992 /* Trip update code goes into TRIP_UPDATE_BB. */
9993 gsi = gsi_start_bb (trip_update_bb);
9994
9995 t = build_int_cst (itype, 1);
9996 t = build2 (PLUS_EXPR, itype, trip_main, t);
9997 assign_stmt = gimple_build_assign (trip_back, t);
9998 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
9999 }
10000
10001 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
10002 gsi = gsi_last_bb (exit_bb);
10003 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
10004 {
10005 t = gimple_omp_return_lhs (gsi_stmt (gsi));
10006 gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT);
10007 }
10008 gsi_remove (&gsi, true);
10009
10010 /* Connect the new blocks. */
10011 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
10012 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
10013
10014 if (!broken_loop)
10015 {
10016 se = find_edge (cont_bb, body_bb);
10017 if (se == NULL)
10018 {
10019 se = BRANCH_EDGE (cont_bb);
10020 gcc_assert (single_succ (se->dest) == body_bb);
10021 }
10022 if (gimple_omp_for_combined_p (fd->for_stmt))
10023 {
10024 remove_edge (se);
10025 se = NULL;
10026 }
10027 else if (fd->collapse > 1)
10028 {
10029 remove_edge (se);
10030 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
10031 }
10032 else
10033 se->flags = EDGE_TRUE_VALUE;
10034 find_edge (cont_bb, trip_update_bb)->flags
10035 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
10036
10037 redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
10038 }
10039
10040 if (gimple_in_ssa_p (cfun))
10041 {
10042 gphi_iterator psi;
10043 gphi *phi;
10044 edge re, ene;
10045 edge_var_map *vm;
10046 size_t i;
10047
10048 gcc_assert (fd->collapse == 1 && !broken_loop);
10049
10050 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
10051 remove arguments of the phi nodes in fin_bb. We need to create
10052 appropriate phi nodes in iter_part_bb instead. */
10053 se = find_edge (iter_part_bb, fin_bb);
10054 re = single_succ_edge (trip_update_bb);
10055 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
10056 ene = single_succ_edge (entry_bb);
10057
10058 psi = gsi_start_phis (fin_bb);
10059 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
10060 gsi_next (&psi), ++i)
10061 {
10062 gphi *nphi;
10063 source_location locus;
10064
10065 phi = psi.phi ();
10066 t = gimple_phi_result (phi);
10067 gcc_assert (t == redirect_edge_var_map_result (vm));
10068
10069 if (!single_pred_p (fin_bb))
10070 t = copy_ssa_name (t, phi);
10071
10072 nphi = create_phi_node (t, iter_part_bb);
10073
10074 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
10075 locus = gimple_phi_arg_location_from_edge (phi, se);
10076
10077 /* A special case -- fd->loop.v is not yet computed in
10078 iter_part_bb, we need to use vextra instead. */
10079 if (t == fd->loop.v)
10080 t = vextra;
10081 add_phi_arg (nphi, t, ene, locus);
10082 locus = redirect_edge_var_map_location (vm);
10083 tree back_arg = redirect_edge_var_map_def (vm);
10084 add_phi_arg (nphi, back_arg, re, locus);
10085 edge ce = find_edge (cont_bb, body_bb);
10086 if (ce == NULL)
10087 {
10088 ce = BRANCH_EDGE (cont_bb);
10089 gcc_assert (single_succ (ce->dest) == body_bb);
10090 ce = single_succ_edge (ce->dest);
10091 }
10092 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
10093 gcc_assert (inner_loop_phi != NULL);
10094 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
10095 find_edge (seq_start_bb, body_bb), locus);
10096
10097 if (!single_pred_p (fin_bb))
10098 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
10099 }
10100 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
10101 redirect_edge_var_map_clear (re);
10102 if (single_pred_p (fin_bb))
10103 while (1)
10104 {
10105 psi = gsi_start_phis (fin_bb);
10106 if (gsi_end_p (psi))
10107 break;
10108 remove_phi_node (&psi, false);
10109 }
10110
10111 /* Make phi node for trip. */
10112 phi = create_phi_node (trip_main, iter_part_bb);
10113 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
10114 UNKNOWN_LOCATION);
10115 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
10116 UNKNOWN_LOCATION);
10117 }
10118
10119 if (!broken_loop)
10120 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
10121 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
10122 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
10123 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
10124 recompute_dominator (CDI_DOMINATORS, fin_bb));
10125 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
10126 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
10127 set_immediate_dominator (CDI_DOMINATORS, body_bb,
10128 recompute_dominator (CDI_DOMINATORS, body_bb));
10129
10130 if (!broken_loop)
10131 {
10132 struct loop *loop = body_bb->loop_father;
10133 struct loop *trip_loop = alloc_loop ();
10134 trip_loop->header = iter_part_bb;
10135 trip_loop->latch = trip_update_bb;
10136 add_loop (trip_loop, iter_part_bb->loop_father);
10137
10138 if (loop != entry_bb->loop_father)
10139 {
10140 gcc_assert (loop->header == body_bb);
10141 gcc_assert (loop->latch == region->cont
10142 || single_pred (loop->latch) == region->cont);
10143 trip_loop->inner = loop;
10144 return;
10145 }
10146
10147 if (!gimple_omp_for_combined_p (fd->for_stmt))
10148 {
10149 loop = alloc_loop ();
10150 loop->header = body_bb;
10151 if (collapse_bb == NULL)
10152 loop->latch = cont_bb;
10153 add_loop (loop, trip_loop);
10154 }
10155 }
10156 }
10157
10158 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
10159 Given parameters:
10160 for (V = N1; V cond N2; V += STEP) BODY;
10161
10162 where COND is "<" or ">" or "!=", we generate pseudocode
10163
10164 for (ind_var = low; ind_var < high; ind_var++)
10165 {
10166 V = n1 + (ind_var * STEP)
10167
10168 <BODY>
10169 }
10170
10171 In the above pseudocode, low and high are function parameters of the
10172 child function. In the function below, we are inserting a temp.
10173 variable that will be making a call to two OMP functions that will not be
10174 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
10175 with _Cilk_for). These functions are replaced with low and high
10176 by the function that handles taskreg. */
10177
10178
10179 static void
10180 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
10181 {
10182 bool broken_loop = region->cont == NULL;
10183 basic_block entry_bb = region->entry;
10184 basic_block cont_bb = region->cont;
10185
10186 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10187 gcc_assert (broken_loop
10188 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10189 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
10190 basic_block l1_bb, l2_bb;
10191
10192 if (!broken_loop)
10193 {
10194 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
10195 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10196 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
10197 l2_bb = BRANCH_EDGE (entry_bb)->dest;
10198 }
10199 else
10200 {
10201 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
10202 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
10203 l2_bb = single_succ (l1_bb);
10204 }
10205 basic_block exit_bb = region->exit;
10206 basic_block l2_dom_bb = NULL;
10207
10208 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
10209
10210 /* Below statements until the "tree high_val = ..." are pseudo statements
10211 used to pass information to be used by expand_omp_taskreg.
10212 low_val and high_val will be replaced by the __low and __high
10213 parameter from the child function.
10214
10215 The call_exprs part is a place-holder, it is mainly used
10216 to distinctly identify to the top-level part that this is
10217 where we should put low and high (reasoning given in header
10218 comment). */
10219
10220 tree child_fndecl
10221 = gimple_omp_parallel_child_fn (
10222 as_a <gomp_parallel *> (last_stmt (region->outer->entry)));
10223 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
10224 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
10225 {
10226 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
10227 high_val = t;
10228 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
10229 low_val = t;
10230 }
10231 gcc_assert (low_val && high_val);
10232
10233 tree type = TREE_TYPE (low_val);
10234 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
10235 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10236
10237 /* Not needed in SSA form right now. */
10238 gcc_assert (!gimple_in_ssa_p (cfun));
10239 if (l2_dom_bb == NULL)
10240 l2_dom_bb = l1_bb;
10241
10242 tree n1 = low_val;
10243 tree n2 = high_val;
10244
10245 gimple *stmt = gimple_build_assign (ind_var, n1);
10246
10247 /* Replace the GIMPLE_OMP_FOR statement. */
10248 gsi_replace (&gsi, stmt, true);
10249
10250 if (!broken_loop)
10251 {
10252 /* Code to control the increment goes in the CONT_BB. */
10253 gsi = gsi_last_bb (cont_bb);
10254 stmt = gsi_stmt (gsi);
10255 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
10256 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
10257 build_one_cst (type));
10258
10259 /* Replace GIMPLE_OMP_CONTINUE. */
10260 gsi_replace (&gsi, stmt, true);
10261 }
10262
10263 /* Emit the condition in L1_BB. */
10264 gsi = gsi_after_labels (l1_bb);
10265 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
10266 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
10267 fd->loop.step);
10268 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
10269 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
10270 fd->loop.n1, fold_convert (sizetype, t));
10271 else
10272 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
10273 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
10274 t = fold_convert (TREE_TYPE (fd->loop.v), t);
10275 expand_omp_build_assign (&gsi, fd->loop.v, t);
10276
10277 /* The condition is always '<' since the runtime will fill in the low
10278 and high values. */
10279 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
10280 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
10281
10282 /* Remove GIMPLE_OMP_RETURN. */
10283 gsi = gsi_last_bb (exit_bb);
10284 gsi_remove (&gsi, true);
10285
10286 /* Connect the new blocks. */
10287 remove_edge (FALLTHRU_EDGE (entry_bb));
10288
10289 edge e, ne;
10290 if (!broken_loop)
10291 {
10292 remove_edge (BRANCH_EDGE (entry_bb));
10293 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
10294
10295 e = BRANCH_EDGE (l1_bb);
10296 ne = FALLTHRU_EDGE (l1_bb);
10297 e->flags = EDGE_TRUE_VALUE;
10298 }
10299 else
10300 {
10301 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
10302
10303 ne = single_succ_edge (l1_bb);
10304 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
10305
10306 }
10307 ne->flags = EDGE_FALSE_VALUE;
10308 e->probability = REG_BR_PROB_BASE * 7 / 8;
10309 ne->probability = REG_BR_PROB_BASE / 8;
10310
10311 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
10312 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
10313 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
10314
10315 if (!broken_loop)
10316 {
10317 struct loop *loop = alloc_loop ();
10318 loop->header = l1_bb;
10319 loop->latch = cont_bb;
10320 add_loop (loop, l1_bb->loop_father);
10321 loop->safelen = INT_MAX;
10322 }
10323
10324 /* Pick the correct library function based on the precision of the
10325 induction variable type. */
10326 tree lib_fun = NULL_TREE;
10327 if (TYPE_PRECISION (type) == 32)
10328 lib_fun = cilk_for_32_fndecl;
10329 else if (TYPE_PRECISION (type) == 64)
10330 lib_fun = cilk_for_64_fndecl;
10331 else
10332 gcc_unreachable ();
10333
10334 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
10335
10336 /* WS_ARGS contains the library function flavor to call:
10337 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
10338 user-defined grain value. If the user does not define one, then zero
10339 is passed in by the parser. */
10340 vec_alloc (region->ws_args, 2);
10341 region->ws_args->quick_push (lib_fun);
10342 region->ws_args->quick_push (fd->chunk_size);
10343 }
10344
10345 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
10346 loop. Given parameters:
10347
10348 for (V = N1; V cond N2; V += STEP) BODY;
10349
10350 where COND is "<" or ">", we generate pseudocode
10351
10352 V = N1;
10353 goto L1;
10354 L0:
10355 BODY;
10356 V += STEP;
10357 L1:
10358 if (V cond N2) goto L0; else goto L2;
10359 L2:
10360
10361 For collapsed loops, given parameters:
10362 collapse(3)
10363 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
10364 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
10365 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
10366 BODY;
10367
10368 we generate pseudocode
10369
10370 if (cond3 is <)
10371 adj = STEP3 - 1;
10372 else
10373 adj = STEP3 + 1;
10374 count3 = (adj + N32 - N31) / STEP3;
10375 if (cond2 is <)
10376 adj = STEP2 - 1;
10377 else
10378 adj = STEP2 + 1;
10379 count2 = (adj + N22 - N21) / STEP2;
10380 if (cond1 is <)
10381 adj = STEP1 - 1;
10382 else
10383 adj = STEP1 + 1;
10384 count1 = (adj + N12 - N11) / STEP1;
10385 count = count1 * count2 * count3;
10386 V = 0;
10387 V1 = N11;
10388 V2 = N21;
10389 V3 = N31;
10390 goto L1;
10391 L0:
10392 BODY;
10393 V += 1;
10394 V3 += STEP3;
10395 V2 += (V3 cond3 N32) ? 0 : STEP2;
10396 V3 = (V3 cond3 N32) ? V3 : N31;
10397 V1 += (V2 cond2 N22) ? 0 : STEP1;
10398 V2 = (V2 cond2 N22) ? V2 : N21;
10399 L1:
10400 if (V < count) goto L0; else goto L2;
10401 L2:
10402
10403 */
10404
10405 static void
10406 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
10407 {
10408 tree type, t;
10409 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
10410 gimple_stmt_iterator gsi;
10411 gimple *stmt;
10412 gcond *cond_stmt;
10413 bool broken_loop = region->cont == NULL;
10414 edge e, ne;
10415 tree *counts = NULL;
10416 int i;
10417 tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10418 OMP_CLAUSE_SAFELEN);
10419 tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10420 OMP_CLAUSE__SIMDUID_);
10421 tree n1, n2;
10422
10423 type = TREE_TYPE (fd->loop.v);
10424 entry_bb = region->entry;
10425 cont_bb = region->cont;
10426 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10427 gcc_assert (broken_loop
10428 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10429 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
10430 if (!broken_loop)
10431 {
10432 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
10433 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10434 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
10435 l2_bb = BRANCH_EDGE (entry_bb)->dest;
10436 }
10437 else
10438 {
10439 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
10440 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
10441 l2_bb = single_succ (l1_bb);
10442 }
10443 exit_bb = region->exit;
10444 l2_dom_bb = NULL;
10445
10446 gsi = gsi_last_bb (entry_bb);
10447
10448 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10449 /* Not needed in SSA form right now. */
10450 gcc_assert (!gimple_in_ssa_p (cfun));
10451 if (fd->collapse > 1)
10452 {
10453 int first_zero_iter = -1, dummy = -1;
10454 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
10455
10456 counts = XALLOCAVEC (tree, fd->collapse);
10457 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
10458 zero_iter_bb, first_zero_iter,
10459 dummy_bb, dummy, l2_dom_bb);
10460 }
10461 if (l2_dom_bb == NULL)
10462 l2_dom_bb = l1_bb;
10463
10464 n1 = fd->loop.n1;
10465 n2 = fd->loop.n2;
10466 if (gimple_omp_for_combined_into_p (fd->for_stmt))
10467 {
10468 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10469 OMP_CLAUSE__LOOPTEMP_);
10470 gcc_assert (innerc);
10471 n1 = OMP_CLAUSE_DECL (innerc);
10472 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10473 OMP_CLAUSE__LOOPTEMP_);
10474 gcc_assert (innerc);
10475 n2 = OMP_CLAUSE_DECL (innerc);
10476 expand_omp_build_assign (&gsi, fd->loop.v,
10477 fold_convert (type, n1));
10478 if (fd->collapse > 1)
10479 {
10480 gsi_prev (&gsi);
10481 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
10482 gsi_next (&gsi);
10483 }
10484 }
10485 else
10486 {
10487 expand_omp_build_assign (&gsi, fd->loop.v,
10488 fold_convert (type, fd->loop.n1));
10489 if (fd->collapse > 1)
10490 for (i = 0; i < fd->collapse; i++)
10491 {
10492 tree itype = TREE_TYPE (fd->loops[i].v);
10493 if (POINTER_TYPE_P (itype))
10494 itype = signed_type_for (itype);
10495 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
10496 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10497 }
10498 }
10499
10500 /* Remove the GIMPLE_OMP_FOR statement. */
10501 gsi_remove (&gsi, true);
10502
10503 if (!broken_loop)
10504 {
10505 /* Code to control the increment goes in the CONT_BB. */
10506 gsi = gsi_last_bb (cont_bb);
10507 stmt = gsi_stmt (gsi);
10508 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
10509
10510 if (POINTER_TYPE_P (type))
10511 t = fold_build_pointer_plus (fd->loop.v, fd->loop.step);
10512 else
10513 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, fd->loop.step);
10514 expand_omp_build_assign (&gsi, fd->loop.v, t);
10515
10516 if (fd->collapse > 1)
10517 {
10518 i = fd->collapse - 1;
10519 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
10520 {
10521 t = fold_convert (sizetype, fd->loops[i].step);
10522 t = fold_build_pointer_plus (fd->loops[i].v, t);
10523 }
10524 else
10525 {
10526 t = fold_convert (TREE_TYPE (fd->loops[i].v),
10527 fd->loops[i].step);
10528 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
10529 fd->loops[i].v, t);
10530 }
10531 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10532
10533 for (i = fd->collapse - 1; i > 0; i--)
10534 {
10535 tree itype = TREE_TYPE (fd->loops[i].v);
10536 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
10537 if (POINTER_TYPE_P (itype2))
10538 itype2 = signed_type_for (itype2);
10539 t = build3 (COND_EXPR, itype2,
10540 build2 (fd->loops[i].cond_code, boolean_type_node,
10541 fd->loops[i].v,
10542 fold_convert (itype, fd->loops[i].n2)),
10543 build_int_cst (itype2, 0),
10544 fold_convert (itype2, fd->loops[i - 1].step));
10545 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
10546 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
10547 else
10548 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
10549 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
10550
10551 t = build3 (COND_EXPR, itype,
10552 build2 (fd->loops[i].cond_code, boolean_type_node,
10553 fd->loops[i].v,
10554 fold_convert (itype, fd->loops[i].n2)),
10555 fd->loops[i].v,
10556 fold_convert (itype, fd->loops[i].n1));
10557 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
10558 }
10559 }
10560
10561 /* Remove GIMPLE_OMP_CONTINUE. */
10562 gsi_remove (&gsi, true);
10563 }
10564
10565 /* Emit the condition in L1_BB. */
10566 gsi = gsi_start_bb (l1_bb);
10567
10568 t = fold_convert (type, n2);
10569 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10570 false, GSI_CONTINUE_LINKING);
10571 tree v = fd->loop.v;
10572 if (DECL_P (v) && TREE_ADDRESSABLE (v))
10573 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
10574 false, GSI_CONTINUE_LINKING);
10575 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
10576 cond_stmt = gimple_build_cond_empty (t);
10577 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
10578 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
10579 NULL, NULL)
10580 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
10581 NULL, NULL))
10582 {
10583 gsi = gsi_for_stmt (cond_stmt);
10584 gimple_regimplify_operands (cond_stmt, &gsi);
10585 }
10586
10587 /* Remove GIMPLE_OMP_RETURN. */
10588 gsi = gsi_last_bb (exit_bb);
10589 gsi_remove (&gsi, true);
10590
10591 /* Connect the new blocks. */
10592 remove_edge (FALLTHRU_EDGE (entry_bb));
10593
10594 if (!broken_loop)
10595 {
10596 remove_edge (BRANCH_EDGE (entry_bb));
10597 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
10598
10599 e = BRANCH_EDGE (l1_bb);
10600 ne = FALLTHRU_EDGE (l1_bb);
10601 e->flags = EDGE_TRUE_VALUE;
10602 }
10603 else
10604 {
10605 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
10606
10607 ne = single_succ_edge (l1_bb);
10608 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
10609
10610 }
10611 ne->flags = EDGE_FALSE_VALUE;
10612 e->probability = REG_BR_PROB_BASE * 7 / 8;
10613 ne->probability = REG_BR_PROB_BASE / 8;
10614
10615 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
10616 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
10617 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
10618
10619 if (!broken_loop)
10620 {
10621 struct loop *loop = alloc_loop ();
10622 loop->header = l1_bb;
10623 loop->latch = cont_bb;
10624 add_loop (loop, l1_bb->loop_father);
10625 if (safelen == NULL_TREE)
10626 loop->safelen = INT_MAX;
10627 else
10628 {
10629 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
10630 if (TREE_CODE (safelen) != INTEGER_CST)
10631 loop->safelen = 0;
10632 else if (!tree_fits_uhwi_p (safelen)
10633 || tree_to_uhwi (safelen) > INT_MAX)
10634 loop->safelen = INT_MAX;
10635 else
10636 loop->safelen = tree_to_uhwi (safelen);
10637 if (loop->safelen == 1)
10638 loop->safelen = 0;
10639 }
10640 if (simduid)
10641 {
10642 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
10643 cfun->has_simduid_loops = true;
10644 }
10645 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
10646 the loop. */
10647 if ((flag_tree_loop_vectorize
10648 || (!global_options_set.x_flag_tree_loop_vectorize
10649 && !global_options_set.x_flag_tree_vectorize))
10650 && flag_tree_loop_optimize
10651 && loop->safelen > 1)
10652 {
10653 loop->force_vectorize = true;
10654 cfun->has_force_vectorize_loops = true;
10655 }
10656 }
10657 else if (simduid)
10658 cfun->has_simduid_loops = true;
10659 }
10660
10661 /* Taskloop construct is represented after gimplification with
10662 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
10663 in between them. This routine expands the outer GIMPLE_OMP_FOR,
10664 which should just compute all the needed loop temporaries
10665 for GIMPLE_OMP_TASK. */
10666
10667 static void
10668 expand_omp_taskloop_for_outer (struct omp_region *region,
10669 struct omp_for_data *fd,
10670 gimple *inner_stmt)
10671 {
10672 tree type, bias = NULL_TREE;
10673 basic_block entry_bb, cont_bb, exit_bb;
10674 gimple_stmt_iterator gsi;
10675 gassign *assign_stmt;
10676 tree *counts = NULL;
10677 int i;
10678
10679 gcc_assert (inner_stmt);
10680 gcc_assert (region->cont);
10681 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
10682 && gimple_omp_task_taskloop_p (inner_stmt));
10683 type = TREE_TYPE (fd->loop.v);
10684
10685 /* See if we need to bias by LLONG_MIN. */
10686 if (fd->iter_type == long_long_unsigned_type_node
10687 && TREE_CODE (type) == INTEGER_TYPE
10688 && !TYPE_UNSIGNED (type))
10689 {
10690 tree n1, n2;
10691
10692 if (fd->loop.cond_code == LT_EXPR)
10693 {
10694 n1 = fd->loop.n1;
10695 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
10696 }
10697 else
10698 {
10699 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
10700 n2 = fd->loop.n1;
10701 }
10702 if (TREE_CODE (n1) != INTEGER_CST
10703 || TREE_CODE (n2) != INTEGER_CST
10704 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
10705 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
10706 }
10707
10708 entry_bb = region->entry;
10709 cont_bb = region->cont;
10710 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10711 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
10712 exit_bb = region->exit;
10713
10714 gsi = gsi_last_bb (entry_bb);
10715 gimple *for_stmt = gsi_stmt (gsi);
10716 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
10717 if (fd->collapse > 1)
10718 {
10719 int first_zero_iter = -1, dummy = -1;
10720 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
10721
10722 counts = XALLOCAVEC (tree, fd->collapse);
10723 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
10724 zero_iter_bb, first_zero_iter,
10725 dummy_bb, dummy, l2_dom_bb);
10726
10727 if (zero_iter_bb)
10728 {
10729 /* Some counts[i] vars might be uninitialized if
10730 some loop has zero iterations. But the body shouldn't
10731 be executed in that case, so just avoid uninit warnings. */
10732 for (i = first_zero_iter; i < fd->collapse; i++)
10733 if (SSA_VAR_P (counts[i]))
10734 TREE_NO_WARNING (counts[i]) = 1;
10735 gsi_prev (&gsi);
10736 edge e = split_block (entry_bb, gsi_stmt (gsi));
10737 entry_bb = e->dest;
10738 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
10739 gsi = gsi_last_bb (entry_bb);
10740 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
10741 get_immediate_dominator (CDI_DOMINATORS,
10742 zero_iter_bb));
10743 }
10744 }
10745
10746 tree t0, t1;
10747 t1 = fd->loop.n2;
10748 t0 = fd->loop.n1;
10749 if (POINTER_TYPE_P (TREE_TYPE (t0))
10750 && TYPE_PRECISION (TREE_TYPE (t0))
10751 != TYPE_PRECISION (fd->iter_type))
10752 {
10753 /* Avoid casting pointers to integer of a different size. */
10754 tree itype = signed_type_for (type);
10755 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
10756 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
10757 }
10758 else
10759 {
10760 t1 = fold_convert (fd->iter_type, t1);
10761 t0 = fold_convert (fd->iter_type, t0);
10762 }
10763 if (bias)
10764 {
10765 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
10766 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
10767 }
10768
10769 tree innerc = find_omp_clause (gimple_omp_task_clauses (inner_stmt),
10770 OMP_CLAUSE__LOOPTEMP_);
10771 gcc_assert (innerc);
10772 tree startvar = OMP_CLAUSE_DECL (innerc);
10773 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
10774 gcc_assert (innerc);
10775 tree endvar = OMP_CLAUSE_DECL (innerc);
10776 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
10777 {
10778 gcc_assert (innerc);
10779 for (i = 1; i < fd->collapse; i++)
10780 {
10781 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10782 OMP_CLAUSE__LOOPTEMP_);
10783 gcc_assert (innerc);
10784 }
10785 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10786 OMP_CLAUSE__LOOPTEMP_);
10787 if (innerc)
10788 {
10789 /* If needed (inner taskloop has lastprivate clause), propagate
10790 down the total number of iterations. */
10791 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
10792 NULL_TREE, false,
10793 GSI_CONTINUE_LINKING);
10794 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
10795 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10796 }
10797 }
10798
10799 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
10800 GSI_CONTINUE_LINKING);
10801 assign_stmt = gimple_build_assign (startvar, t0);
10802 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10803
10804 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
10805 GSI_CONTINUE_LINKING);
10806 assign_stmt = gimple_build_assign (endvar, t1);
10807 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10808 if (fd->collapse > 1)
10809 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
10810
10811 /* Remove the GIMPLE_OMP_FOR statement. */
10812 gsi = gsi_for_stmt (for_stmt);
10813 gsi_remove (&gsi, true);
10814
10815 gsi = gsi_last_bb (cont_bb);
10816 gsi_remove (&gsi, true);
10817
10818 gsi = gsi_last_bb (exit_bb);
10819 gsi_remove (&gsi, true);
10820
10821 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
10822 remove_edge (BRANCH_EDGE (entry_bb));
10823 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
10824 remove_edge (BRANCH_EDGE (cont_bb));
10825 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
10826 set_immediate_dominator (CDI_DOMINATORS, region->entry,
10827 recompute_dominator (CDI_DOMINATORS, region->entry));
10828 }
10829
10830 /* Taskloop construct is represented after gimplification with
10831 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
10832 in between them. This routine expands the inner GIMPLE_OMP_FOR.
10833 GOMP_taskloop{,_ull} function arranges for each task to be given just
10834 a single range of iterations. */
10835
10836 static void
10837 expand_omp_taskloop_for_inner (struct omp_region *region,
10838 struct omp_for_data *fd,
10839 gimple *inner_stmt)
10840 {
10841 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
10842 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
10843 basic_block fin_bb;
10844 gimple_stmt_iterator gsi;
10845 edge ep;
10846 bool broken_loop = region->cont == NULL;
10847 tree *counts = NULL;
10848 tree n1, n2, step;
10849
10850 itype = type = TREE_TYPE (fd->loop.v);
10851 if (POINTER_TYPE_P (type))
10852 itype = signed_type_for (type);
10853
10854 /* See if we need to bias by LLONG_MIN. */
10855 if (fd->iter_type == long_long_unsigned_type_node
10856 && TREE_CODE (type) == INTEGER_TYPE
10857 && !TYPE_UNSIGNED (type))
10858 {
10859 tree n1, n2;
10860
10861 if (fd->loop.cond_code == LT_EXPR)
10862 {
10863 n1 = fd->loop.n1;
10864 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
10865 }
10866 else
10867 {
10868 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
10869 n2 = fd->loop.n1;
10870 }
10871 if (TREE_CODE (n1) != INTEGER_CST
10872 || TREE_CODE (n2) != INTEGER_CST
10873 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
10874 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
10875 }
10876
10877 entry_bb = region->entry;
10878 cont_bb = region->cont;
10879 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
10880 fin_bb = BRANCH_EDGE (entry_bb)->dest;
10881 gcc_assert (broken_loop
10882 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
10883 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
10884 if (!broken_loop)
10885 {
10886 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
10887 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
10888 }
10889 exit_bb = region->exit;
10890
10891 /* Iteration space partitioning goes in ENTRY_BB. */
10892 gsi = gsi_last_bb (entry_bb);
10893 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
10894
10895 if (fd->collapse > 1)
10896 {
10897 int first_zero_iter = -1, dummy = -1;
10898 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
10899
10900 counts = XALLOCAVEC (tree, fd->collapse);
10901 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
10902 fin_bb, first_zero_iter,
10903 dummy_bb, dummy, l2_dom_bb);
10904 t = NULL_TREE;
10905 }
10906 else
10907 t = integer_one_node;
10908
10909 step = fd->loop.step;
10910 tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt),
10911 OMP_CLAUSE__LOOPTEMP_);
10912 gcc_assert (innerc);
10913 n1 = OMP_CLAUSE_DECL (innerc);
10914 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
10915 gcc_assert (innerc);
10916 n2 = OMP_CLAUSE_DECL (innerc);
10917 if (bias)
10918 {
10919 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
10920 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
10921 }
10922 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
10923 true, NULL_TREE, true, GSI_SAME_STMT);
10924 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
10925 true, NULL_TREE, true, GSI_SAME_STMT);
10926 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
10927 true, NULL_TREE, true, GSI_SAME_STMT);
10928
10929 tree startvar = fd->loop.v;
10930 tree endvar = NULL_TREE;
10931
10932 if (gimple_omp_for_combined_p (fd->for_stmt))
10933 {
10934 tree clauses = gimple_omp_for_clauses (inner_stmt);
10935 tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
10936 gcc_assert (innerc);
10937 startvar = OMP_CLAUSE_DECL (innerc);
10938 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
10939 OMP_CLAUSE__LOOPTEMP_);
10940 gcc_assert (innerc);
10941 endvar = OMP_CLAUSE_DECL (innerc);
10942 }
10943 t = fold_convert (TREE_TYPE (startvar), n1);
10944 t = force_gimple_operand_gsi (&gsi, t,
10945 DECL_P (startvar)
10946 && TREE_ADDRESSABLE (startvar),
10947 NULL_TREE, false, GSI_CONTINUE_LINKING);
10948 gimple *assign_stmt = gimple_build_assign (startvar, t);
10949 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10950
10951 t = fold_convert (TREE_TYPE (startvar), n2);
10952 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
10953 false, GSI_CONTINUE_LINKING);
10954 if (endvar)
10955 {
10956 assign_stmt = gimple_build_assign (endvar, e);
10957 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10958 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
10959 assign_stmt = gimple_build_assign (fd->loop.v, e);
10960 else
10961 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
10962 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
10963 }
10964 if (fd->collapse > 1)
10965 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
10966
10967 if (!broken_loop)
10968 {
10969 /* The code controlling the sequential loop replaces the
10970 GIMPLE_OMP_CONTINUE. */
10971 gsi = gsi_last_bb (cont_bb);
10972 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
10973 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
10974 vmain = gimple_omp_continue_control_use (cont_stmt);
10975 vback = gimple_omp_continue_control_def (cont_stmt);
10976
10977 if (!gimple_omp_for_combined_p (fd->for_stmt))
10978 {
10979 if (POINTER_TYPE_P (type))
10980 t = fold_build_pointer_plus (vmain, step);
10981 else
10982 t = fold_build2 (PLUS_EXPR, type, vmain, step);
10983 t = force_gimple_operand_gsi (&gsi, t,
10984 DECL_P (vback)
10985 && TREE_ADDRESSABLE (vback),
10986 NULL_TREE, true, GSI_SAME_STMT);
10987 assign_stmt = gimple_build_assign (vback, t);
10988 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
10989
10990 t = build2 (fd->loop.cond_code, boolean_type_node,
10991 DECL_P (vback) && TREE_ADDRESSABLE (vback)
10992 ? t : vback, e);
10993 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
10994 }
10995
10996 /* Remove the GIMPLE_OMP_CONTINUE statement. */
10997 gsi_remove (&gsi, true);
10998
10999 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
11000 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
11001 }
11002
11003 /* Remove the GIMPLE_OMP_FOR statement. */
11004 gsi = gsi_for_stmt (fd->for_stmt);
11005 gsi_remove (&gsi, true);
11006
11007 /* Remove the GIMPLE_OMP_RETURN statement. */
11008 gsi = gsi_last_bb (exit_bb);
11009 gsi_remove (&gsi, true);
11010
11011 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
11012 if (!broken_loop)
11013 remove_edge (BRANCH_EDGE (entry_bb));
11014 else
11015 {
11016 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
11017 region->outer->cont = NULL;
11018 }
11019
11020 /* Connect all the blocks. */
11021 if (!broken_loop)
11022 {
11023 ep = find_edge (cont_bb, body_bb);
11024 if (gimple_omp_for_combined_p (fd->for_stmt))
11025 {
11026 remove_edge (ep);
11027 ep = NULL;
11028 }
11029 else if (fd->collapse > 1)
11030 {
11031 remove_edge (ep);
11032 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
11033 }
11034 else
11035 ep->flags = EDGE_TRUE_VALUE;
11036 find_edge (cont_bb, fin_bb)->flags
11037 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
11038 }
11039
11040 set_immediate_dominator (CDI_DOMINATORS, body_bb,
11041 recompute_dominator (CDI_DOMINATORS, body_bb));
11042 if (!broken_loop)
11043 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
11044 recompute_dominator (CDI_DOMINATORS, fin_bb));
11045
11046 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
11047 {
11048 struct loop *loop = alloc_loop ();
11049 loop->header = body_bb;
11050 if (collapse_bb == NULL)
11051 loop->latch = cont_bb;
11052 add_loop (loop, body_bb->loop_father);
11053 }
11054 }
11055
11056 /* A subroutine of expand_omp_for. Generate code for an OpenACC
11057 partitioned loop. The lowering here is abstracted, in that the
11058 loop parameters are passed through internal functions, which are
11059 further lowered by oacc_device_lower, once we get to the target
11060 compiler. The loop is of the form:
11061
11062 for (V = B; V LTGT E; V += S) {BODY}
11063
11064 where LTGT is < or >. We may have a specified chunking size, CHUNKING
11065 (constant 0 for no chunking) and we will have a GWV partitioning
11066 mask, specifying dimensions over which the loop is to be
11067 partitioned (see note below). We generate code that looks like:
11068
11069 <entry_bb> [incoming FALL->body, BRANCH->exit]
11070 typedef signedintify (typeof (V)) T; // underlying signed integral type
11071 T range = E - B;
11072 T chunk_no = 0;
11073 T DIR = LTGT == '<' ? +1 : -1;
11074 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
11075 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
11076
11077 <head_bb> [created by splitting end of entry_bb]
11078 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
11079 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
11080 if (!(offset LTGT bound)) goto bottom_bb;
11081
11082 <body_bb> [incoming]
11083 V = B + offset;
11084 {BODY}
11085
11086 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
11087 offset += step;
11088 if (offset LTGT bound) goto body_bb; [*]
11089
11090 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
11091 chunk_no++;
11092 if (chunk < chunk_max) goto head_bb;
11093
11094 <exit_bb> [incoming]
11095 V = B + ((range -/+ 1) / S +/- 1) * S [*]
11096
11097 [*] Needed if V live at end of loop
11098
11099 Note: CHUNKING & GWV mask are specified explicitly here. This is a
11100 transition, and will be specified by a more general mechanism shortly.
11101 */
11102
11103 static void
11104 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
11105 {
11106 tree v = fd->loop.v;
11107 enum tree_code cond_code = fd->loop.cond_code;
11108 enum tree_code plus_code = PLUS_EXPR;
11109
11110 tree chunk_size = integer_minus_one_node;
11111 tree gwv = integer_zero_node;
11112 tree iter_type = TREE_TYPE (v);
11113 tree diff_type = iter_type;
11114 tree plus_type = iter_type;
11115 struct oacc_collapse *counts = NULL;
11116
11117 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
11118 == GF_OMP_FOR_KIND_OACC_LOOP);
11119 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
11120 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
11121
11122 if (POINTER_TYPE_P (iter_type))
11123 {
11124 plus_code = POINTER_PLUS_EXPR;
11125 plus_type = sizetype;
11126 }
11127 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
11128 diff_type = signed_type_for (diff_type);
11129
11130 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
11131 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
11132 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
11133 basic_block bottom_bb = NULL;
11134
11135 /* entry_bb has two sucessors; the branch edge is to the exit
11136 block, fallthrough edge to body. */
11137 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
11138 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
11139
11140 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
11141 body_bb, or to a block whose only successor is the body_bb. Its
11142 fallthrough successor is the final block (same as the branch
11143 successor of the entry_bb). */
11144 if (cont_bb)
11145 {
11146 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
11147 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
11148
11149 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
11150 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
11151 }
11152 else
11153 gcc_assert (!gimple_in_ssa_p (cfun));
11154
11155 /* The exit block only has entry_bb and cont_bb as predecessors. */
11156 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
11157
11158 tree chunk_no;
11159 tree chunk_max = NULL_TREE;
11160 tree bound, offset;
11161 tree step = create_tmp_var (diff_type, ".step");
11162 bool up = cond_code == LT_EXPR;
11163 tree dir = build_int_cst (diff_type, up ? +1 : -1);
11164 bool chunking = !gimple_in_ssa_p (cfun);;
11165 bool negating;
11166
11167 /* SSA instances. */
11168 tree offset_incr = NULL_TREE;
11169 tree offset_init = NULL_TREE;
11170
11171 gimple_stmt_iterator gsi;
11172 gassign *ass;
11173 gcall *call;
11174 gimple *stmt;
11175 tree expr;
11176 location_t loc;
11177 edge split, be, fte;
11178
11179 /* Split the end of entry_bb to create head_bb. */
11180 split = split_block (entry_bb, last_stmt (entry_bb));
11181 basic_block head_bb = split->dest;
11182 entry_bb = split->src;
11183
11184 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
11185 gsi = gsi_last_bb (entry_bb);
11186 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
11187 loc = gimple_location (for_stmt);
11188
11189 if (gimple_in_ssa_p (cfun))
11190 {
11191 offset_init = gimple_omp_for_index (for_stmt, 0);
11192 gcc_assert (integer_zerop (fd->loop.n1));
11193 /* The SSA parallelizer does gang parallelism. */
11194 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
11195 }
11196
11197 if (fd->collapse > 1)
11198 {
11199 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
11200 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
11201 TREE_TYPE (fd->loop.n2));
11202
11203 if (SSA_VAR_P (fd->loop.n2))
11204 {
11205 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
11206 true, GSI_SAME_STMT);
11207 ass = gimple_build_assign (fd->loop.n2, total);
11208 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11209 }
11210
11211 }
11212
11213 tree b = fd->loop.n1;
11214 tree e = fd->loop.n2;
11215 tree s = fd->loop.step;
11216
11217 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
11218 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
11219
11220 /* Convert the step, avoiding possible unsigned->signed overflow. */
11221 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
11222 if (negating)
11223 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
11224 s = fold_convert (diff_type, s);
11225 if (negating)
11226 s = fold_build1 (NEGATE_EXPR, diff_type, s);
11227 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
11228
11229 if (!chunking)
11230 chunk_size = integer_zero_node;
11231 expr = fold_convert (diff_type, chunk_size);
11232 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
11233 NULL_TREE, true, GSI_SAME_STMT);
11234 /* Determine the range, avoiding possible unsigned->signed overflow. */
11235 negating = !up && TYPE_UNSIGNED (iter_type);
11236 expr = fold_build2 (MINUS_EXPR, plus_type,
11237 fold_convert (plus_type, negating ? b : e),
11238 fold_convert (plus_type, negating ? e : b));
11239 expr = fold_convert (diff_type, expr);
11240 if (negating)
11241 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
11242 tree range = force_gimple_operand_gsi (&gsi, expr, true,
11243 NULL_TREE, true, GSI_SAME_STMT);
11244
11245 chunk_no = build_int_cst (diff_type, 0);
11246 if (chunking)
11247 {
11248 gcc_assert (!gimple_in_ssa_p (cfun));
11249
11250 expr = chunk_no;
11251 chunk_max = create_tmp_var (diff_type, ".chunk_max");
11252 chunk_no = create_tmp_var (diff_type, ".chunk_no");
11253
11254 ass = gimple_build_assign (chunk_no, expr);
11255 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11256
11257 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
11258 build_int_cst (integer_type_node,
11259 IFN_GOACC_LOOP_CHUNKS),
11260 dir, range, s, chunk_size, gwv);
11261 gimple_call_set_lhs (call, chunk_max);
11262 gimple_set_location (call, loc);
11263 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
11264 }
11265 else
11266 chunk_size = chunk_no;
11267
11268 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
11269 build_int_cst (integer_type_node,
11270 IFN_GOACC_LOOP_STEP),
11271 dir, range, s, chunk_size, gwv);
11272 gimple_call_set_lhs (call, step);
11273 gimple_set_location (call, loc);
11274 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
11275
11276 /* Remove the GIMPLE_OMP_FOR. */
11277 gsi_remove (&gsi, true);
11278
11279 /* Fixup edges from head_bb */
11280 be = BRANCH_EDGE (head_bb);
11281 fte = FALLTHRU_EDGE (head_bb);
11282 be->flags |= EDGE_FALSE_VALUE;
11283 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
11284
11285 basic_block body_bb = fte->dest;
11286
11287 if (gimple_in_ssa_p (cfun))
11288 {
11289 gsi = gsi_last_bb (cont_bb);
11290 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11291
11292 offset = gimple_omp_continue_control_use (cont_stmt);
11293 offset_incr = gimple_omp_continue_control_def (cont_stmt);
11294 }
11295 else
11296 {
11297 offset = create_tmp_var (diff_type, ".offset");
11298 offset_init = offset_incr = offset;
11299 }
11300 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
11301
11302 /* Loop offset & bound go into head_bb. */
11303 gsi = gsi_start_bb (head_bb);
11304
11305 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
11306 build_int_cst (integer_type_node,
11307 IFN_GOACC_LOOP_OFFSET),
11308 dir, range, s,
11309 chunk_size, gwv, chunk_no);
11310 gimple_call_set_lhs (call, offset_init);
11311 gimple_set_location (call, loc);
11312 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
11313
11314 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
11315 build_int_cst (integer_type_node,
11316 IFN_GOACC_LOOP_BOUND),
11317 dir, range, s,
11318 chunk_size, gwv, offset_init);
11319 gimple_call_set_lhs (call, bound);
11320 gimple_set_location (call, loc);
11321 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
11322
11323 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
11324 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
11325 GSI_CONTINUE_LINKING);
11326
11327 /* V assignment goes into body_bb. */
11328 if (!gimple_in_ssa_p (cfun))
11329 {
11330 gsi = gsi_start_bb (body_bb);
11331
11332 expr = build2 (plus_code, iter_type, b,
11333 fold_convert (plus_type, offset));
11334 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11335 true, GSI_SAME_STMT);
11336 ass = gimple_build_assign (v, expr);
11337 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11338 if (fd->collapse > 1)
11339 expand_oacc_collapse_vars (fd, &gsi, counts, v);
11340 }
11341
11342 /* Loop increment goes into cont_bb. If this is not a loop, we
11343 will have spawned threads as if it was, and each one will
11344 execute one iteration. The specification is not explicit about
11345 whether such constructs are ill-formed or not, and they can
11346 occur, especially when noreturn routines are involved. */
11347 if (cont_bb)
11348 {
11349 gsi = gsi_last_bb (cont_bb);
11350 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
11351 loc = gimple_location (cont_stmt);
11352
11353 /* Increment offset. */
11354 if (gimple_in_ssa_p (cfun))
11355 expr= build2 (plus_code, iter_type, offset,
11356 fold_convert (plus_type, step));
11357 else
11358 expr = build2 (PLUS_EXPR, diff_type, offset, step);
11359 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11360 true, GSI_SAME_STMT);
11361 ass = gimple_build_assign (offset_incr, expr);
11362 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11363 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
11364 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
11365
11366 /* Remove the GIMPLE_OMP_CONTINUE. */
11367 gsi_remove (&gsi, true);
11368
11369 /* Fixup edges from cont_bb */
11370 be = BRANCH_EDGE (cont_bb);
11371 fte = FALLTHRU_EDGE (cont_bb);
11372 be->flags |= EDGE_TRUE_VALUE;
11373 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
11374
11375 if (chunking)
11376 {
11377 /* Split the beginning of exit_bb to make bottom_bb. We
11378 need to insert a nop at the start, because splitting is
11379 after a stmt, not before. */
11380 gsi = gsi_start_bb (exit_bb);
11381 stmt = gimple_build_nop ();
11382 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
11383 split = split_block (exit_bb, stmt);
11384 bottom_bb = split->src;
11385 exit_bb = split->dest;
11386 gsi = gsi_last_bb (bottom_bb);
11387
11388 /* Chunk increment and test goes into bottom_bb. */
11389 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
11390 build_int_cst (diff_type, 1));
11391 ass = gimple_build_assign (chunk_no, expr);
11392 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
11393
11394 /* Chunk test at end of bottom_bb. */
11395 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
11396 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
11397 GSI_CONTINUE_LINKING);
11398
11399 /* Fixup edges from bottom_bb. */
11400 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
11401 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
11402 }
11403 }
11404
11405 gsi = gsi_last_bb (exit_bb);
11406 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
11407 loc = gimple_location (gsi_stmt (gsi));
11408
11409 if (!gimple_in_ssa_p (cfun))
11410 {
11411 /* Insert the final value of V, in case it is live. This is the
11412 value for the only thread that survives past the join. */
11413 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
11414 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
11415 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
11416 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
11417 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
11418 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
11419 true, GSI_SAME_STMT);
11420 ass = gimple_build_assign (v, expr);
11421 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
11422 }
11423
11424 /* Remove the OMP_RETURN. */
11425 gsi_remove (&gsi, true);
11426
11427 if (cont_bb)
11428 {
11429 /* We now have one or two nested loops. Update the loop
11430 structures. */
11431 struct loop *parent = entry_bb->loop_father;
11432 struct loop *body = body_bb->loop_father;
11433
11434 if (chunking)
11435 {
11436 struct loop *chunk_loop = alloc_loop ();
11437 chunk_loop->header = head_bb;
11438 chunk_loop->latch = bottom_bb;
11439 add_loop (chunk_loop, parent);
11440 parent = chunk_loop;
11441 }
11442 else if (parent != body)
11443 {
11444 gcc_assert (body->header == body_bb);
11445 gcc_assert (body->latch == cont_bb
11446 || single_pred (body->latch) == cont_bb);
11447 parent = NULL;
11448 }
11449
11450 if (parent)
11451 {
11452 struct loop *body_loop = alloc_loop ();
11453 body_loop->header = body_bb;
11454 body_loop->latch = cont_bb;
11455 add_loop (body_loop, parent);
11456 }
11457 }
11458 }
11459
11460 /* Expand the OMP loop defined by REGION. */
11461
11462 static void
11463 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
11464 {
11465 struct omp_for_data fd;
11466 struct omp_for_data_loop *loops;
11467
11468 loops
11469 = (struct omp_for_data_loop *)
11470 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
11471 * sizeof (struct omp_for_data_loop));
11472 extract_omp_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
11473 &fd, loops);
11474 region->sched_kind = fd.sched_kind;
11475 region->sched_modifiers = fd.sched_modifiers;
11476
11477 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
11478 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
11479 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
11480 if (region->cont)
11481 {
11482 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
11483 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
11484 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
11485 }
11486 else
11487 /* If there isn't a continue then this is a degerate case where
11488 the introduction of abnormal edges during lowering will prevent
11489 original loops from being detected. Fix that up. */
11490 loops_state_set (LOOPS_NEED_FIXUP);
11491
11492 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
11493 expand_omp_simd (region, &fd);
11494 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
11495 expand_cilk_for (region, &fd);
11496 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
11497 {
11498 gcc_assert (!inner_stmt);
11499 expand_oacc_for (region, &fd);
11500 }
11501 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
11502 {
11503 if (gimple_omp_for_combined_into_p (fd.for_stmt))
11504 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
11505 else
11506 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
11507 }
11508 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
11509 && !fd.have_ordered)
11510 {
11511 if (fd.chunk_size == NULL)
11512 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
11513 else
11514 expand_omp_for_static_chunk (region, &fd, inner_stmt);
11515 }
11516 else
11517 {
11518 int fn_index, start_ix, next_ix;
11519
11520 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
11521 == GF_OMP_FOR_KIND_FOR);
11522 if (fd.chunk_size == NULL
11523 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
11524 fd.chunk_size = integer_zero_node;
11525 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
11526 switch (fd.sched_kind)
11527 {
11528 case OMP_CLAUSE_SCHEDULE_RUNTIME:
11529 fn_index = 3;
11530 break;
11531 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
11532 case OMP_CLAUSE_SCHEDULE_GUIDED:
11533 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
11534 && !fd.ordered
11535 && !fd.have_ordered)
11536 {
11537 fn_index = 3 + fd.sched_kind;
11538 break;
11539 }
11540 /* FALLTHRU */
11541 default:
11542 fn_index = fd.sched_kind;
11543 break;
11544 }
11545 if (!fd.ordered)
11546 fn_index += fd.have_ordered * 6;
11547 if (fd.ordered)
11548 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
11549 else
11550 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
11551 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
11552 if (fd.iter_type == long_long_unsigned_type_node)
11553 {
11554 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
11555 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
11556 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
11557 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
11558 }
11559 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
11560 (enum built_in_function) next_ix, inner_stmt);
11561 }
11562
11563 if (gimple_in_ssa_p (cfun))
11564 update_ssa (TODO_update_ssa_only_virtuals);
11565 }
11566
11567
11568 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
11569
11570 v = GOMP_sections_start (n);
11571 L0:
11572 switch (v)
11573 {
11574 case 0:
11575 goto L2;
11576 case 1:
11577 section 1;
11578 goto L1;
11579 case 2:
11580 ...
11581 case n:
11582 ...
11583 default:
11584 abort ();
11585 }
11586 L1:
11587 v = GOMP_sections_next ();
11588 goto L0;
11589 L2:
11590 reduction;
11591
11592 If this is a combined parallel sections, replace the call to
11593 GOMP_sections_start with call to GOMP_sections_next. */
11594
11595 static void
11596 expand_omp_sections (struct omp_region *region)
11597 {
11598 tree t, u, vin = NULL, vmain, vnext, l2;
11599 unsigned len;
11600 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
11601 gimple_stmt_iterator si, switch_si;
11602 gomp_sections *sections_stmt;
11603 gimple *stmt;
11604 gomp_continue *cont;
11605 edge_iterator ei;
11606 edge e;
11607 struct omp_region *inner;
11608 unsigned i, casei;
11609 bool exit_reachable = region->cont != NULL;
11610
11611 gcc_assert (region->exit != NULL);
11612 entry_bb = region->entry;
11613 l0_bb = single_succ (entry_bb);
11614 l1_bb = region->cont;
11615 l2_bb = region->exit;
11616 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
11617 l2 = gimple_block_label (l2_bb);
11618 else
11619 {
11620 /* This can happen if there are reductions. */
11621 len = EDGE_COUNT (l0_bb->succs);
11622 gcc_assert (len > 0);
11623 e = EDGE_SUCC (l0_bb, len - 1);
11624 si = gsi_last_bb (e->dest);
11625 l2 = NULL_TREE;
11626 if (gsi_end_p (si)
11627 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
11628 l2 = gimple_block_label (e->dest);
11629 else
11630 FOR_EACH_EDGE (e, ei, l0_bb->succs)
11631 {
11632 si = gsi_last_bb (e->dest);
11633 if (gsi_end_p (si)
11634 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
11635 {
11636 l2 = gimple_block_label (e->dest);
11637 break;
11638 }
11639 }
11640 }
11641 if (exit_reachable)
11642 default_bb = create_empty_bb (l1_bb->prev_bb);
11643 else
11644 default_bb = create_empty_bb (l0_bb);
11645
11646 /* We will build a switch() with enough cases for all the
11647 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
11648 and a default case to abort if something goes wrong. */
11649 len = EDGE_COUNT (l0_bb->succs);
11650
11651 /* Use vec::quick_push on label_vec throughout, since we know the size
11652 in advance. */
11653 auto_vec<tree> label_vec (len);
11654
11655 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
11656 GIMPLE_OMP_SECTIONS statement. */
11657 si = gsi_last_bb (entry_bb);
11658 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
11659 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
11660 vin = gimple_omp_sections_control (sections_stmt);
11661 if (!is_combined_parallel (region))
11662 {
11663 /* If we are not inside a combined parallel+sections region,
11664 call GOMP_sections_start. */
11665 t = build_int_cst (unsigned_type_node, len - 1);
11666 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
11667 stmt = gimple_build_call (u, 1, t);
11668 }
11669 else
11670 {
11671 /* Otherwise, call GOMP_sections_next. */
11672 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
11673 stmt = gimple_build_call (u, 0);
11674 }
11675 gimple_call_set_lhs (stmt, vin);
11676 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
11677 gsi_remove (&si, true);
11678
11679 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
11680 L0_BB. */
11681 switch_si = gsi_last_bb (l0_bb);
11682 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
11683 if (exit_reachable)
11684 {
11685 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
11686 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
11687 vmain = gimple_omp_continue_control_use (cont);
11688 vnext = gimple_omp_continue_control_def (cont);
11689 }
11690 else
11691 {
11692 vmain = vin;
11693 vnext = NULL_TREE;
11694 }
11695
11696 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
11697 label_vec.quick_push (t);
11698 i = 1;
11699
11700 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
11701 for (inner = region->inner, casei = 1;
11702 inner;
11703 inner = inner->next, i++, casei++)
11704 {
11705 basic_block s_entry_bb, s_exit_bb;
11706
11707 /* Skip optional reduction region. */
11708 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
11709 {
11710 --i;
11711 --casei;
11712 continue;
11713 }
11714
11715 s_entry_bb = inner->entry;
11716 s_exit_bb = inner->exit;
11717
11718 t = gimple_block_label (s_entry_bb);
11719 u = build_int_cst (unsigned_type_node, casei);
11720 u = build_case_label (u, NULL, t);
11721 label_vec.quick_push (u);
11722
11723 si = gsi_last_bb (s_entry_bb);
11724 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
11725 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
11726 gsi_remove (&si, true);
11727 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
11728
11729 if (s_exit_bb == NULL)
11730 continue;
11731
11732 si = gsi_last_bb (s_exit_bb);
11733 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
11734 gsi_remove (&si, true);
11735
11736 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
11737 }
11738
11739 /* Error handling code goes in DEFAULT_BB. */
11740 t = gimple_block_label (default_bb);
11741 u = build_case_label (NULL, NULL, t);
11742 make_edge (l0_bb, default_bb, 0);
11743 add_bb_to_loop (default_bb, current_loops->tree_root);
11744
11745 stmt = gimple_build_switch (vmain, u, label_vec);
11746 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
11747 gsi_remove (&switch_si, true);
11748
11749 si = gsi_start_bb (default_bb);
11750 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
11751 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
11752
11753 if (exit_reachable)
11754 {
11755 tree bfn_decl;
11756
11757 /* Code to get the next section goes in L1_BB. */
11758 si = gsi_last_bb (l1_bb);
11759 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
11760
11761 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
11762 stmt = gimple_build_call (bfn_decl, 0);
11763 gimple_call_set_lhs (stmt, vnext);
11764 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
11765 gsi_remove (&si, true);
11766
11767 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
11768 }
11769
11770 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
11771 si = gsi_last_bb (l2_bb);
11772 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
11773 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
11774 else if (gimple_omp_return_lhs (gsi_stmt (si)))
11775 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
11776 else
11777 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
11778 stmt = gimple_build_call (t, 0);
11779 if (gimple_omp_return_lhs (gsi_stmt (si)))
11780 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
11781 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
11782 gsi_remove (&si, true);
11783
11784 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
11785 }
11786
11787
11788 /* Expand code for an OpenMP single directive. We've already expanded
11789 much of the code, here we simply place the GOMP_barrier call. */
11790
11791 static void
11792 expand_omp_single (struct omp_region *region)
11793 {
11794 basic_block entry_bb, exit_bb;
11795 gimple_stmt_iterator si;
11796
11797 entry_bb = region->entry;
11798 exit_bb = region->exit;
11799
11800 si = gsi_last_bb (entry_bb);
11801 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
11802 gsi_remove (&si, true);
11803 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
11804
11805 si = gsi_last_bb (exit_bb);
11806 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
11807 {
11808 tree t = gimple_omp_return_lhs (gsi_stmt (si));
11809 gsi_insert_after (&si, build_omp_barrier (t), GSI_SAME_STMT);
11810 }
11811 gsi_remove (&si, true);
11812 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
11813 }
11814
11815
11816 /* Generic expansion for OpenMP synchronization directives: master,
11817 ordered and critical. All we need to do here is remove the entry
11818 and exit markers for REGION. */
11819
11820 static void
11821 expand_omp_synch (struct omp_region *region)
11822 {
11823 basic_block entry_bb, exit_bb;
11824 gimple_stmt_iterator si;
11825
11826 entry_bb = region->entry;
11827 exit_bb = region->exit;
11828
11829 si = gsi_last_bb (entry_bb);
11830 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
11831 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
11832 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
11833 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
11834 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
11835 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
11836 gsi_remove (&si, true);
11837 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
11838
11839 if (exit_bb)
11840 {
11841 si = gsi_last_bb (exit_bb);
11842 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
11843 gsi_remove (&si, true);
11844 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
11845 }
11846 }
11847
11848 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
11849 operation as a normal volatile load. */
11850
11851 static bool
11852 expand_omp_atomic_load (basic_block load_bb, tree addr,
11853 tree loaded_val, int index)
11854 {
11855 enum built_in_function tmpbase;
11856 gimple_stmt_iterator gsi;
11857 basic_block store_bb;
11858 location_t loc;
11859 gimple *stmt;
11860 tree decl, call, type, itype;
11861
11862 gsi = gsi_last_bb (load_bb);
11863 stmt = gsi_stmt (gsi);
11864 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
11865 loc = gimple_location (stmt);
11866
11867 /* ??? If the target does not implement atomic_load_optab[mode], and mode
11868 is smaller than word size, then expand_atomic_load assumes that the load
11869 is atomic. We could avoid the builtin entirely in this case. */
11870
11871 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
11872 decl = builtin_decl_explicit (tmpbase);
11873 if (decl == NULL_TREE)
11874 return false;
11875
11876 type = TREE_TYPE (loaded_val);
11877 itype = TREE_TYPE (TREE_TYPE (decl));
11878
11879 call = build_call_expr_loc (loc, decl, 2, addr,
11880 build_int_cst (NULL,
11881 gimple_omp_atomic_seq_cst_p (stmt)
11882 ? MEMMODEL_SEQ_CST
11883 : MEMMODEL_RELAXED));
11884 if (!useless_type_conversion_p (type, itype))
11885 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
11886 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
11887
11888 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
11889 gsi_remove (&gsi, true);
11890
11891 store_bb = single_succ (load_bb);
11892 gsi = gsi_last_bb (store_bb);
11893 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
11894 gsi_remove (&gsi, true);
11895
11896 if (gimple_in_ssa_p (cfun))
11897 update_ssa (TODO_update_ssa_no_phi);
11898
11899 return true;
11900 }
11901
11902 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
11903 operation as a normal volatile store. */
11904
11905 static bool
11906 expand_omp_atomic_store (basic_block load_bb, tree addr,
11907 tree loaded_val, tree stored_val, int index)
11908 {
11909 enum built_in_function tmpbase;
11910 gimple_stmt_iterator gsi;
11911 basic_block store_bb = single_succ (load_bb);
11912 location_t loc;
11913 gimple *stmt;
11914 tree decl, call, type, itype;
11915 machine_mode imode;
11916 bool exchange;
11917
11918 gsi = gsi_last_bb (load_bb);
11919 stmt = gsi_stmt (gsi);
11920 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
11921
11922 /* If the load value is needed, then this isn't a store but an exchange. */
11923 exchange = gimple_omp_atomic_need_value_p (stmt);
11924
11925 gsi = gsi_last_bb (store_bb);
11926 stmt = gsi_stmt (gsi);
11927 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
11928 loc = gimple_location (stmt);
11929
11930 /* ??? If the target does not implement atomic_store_optab[mode], and mode
11931 is smaller than word size, then expand_atomic_store assumes that the store
11932 is atomic. We could avoid the builtin entirely in this case. */
11933
11934 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
11935 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
11936 decl = builtin_decl_explicit (tmpbase);
11937 if (decl == NULL_TREE)
11938 return false;
11939
11940 type = TREE_TYPE (stored_val);
11941
11942 /* Dig out the type of the function's second argument. */
11943 itype = TREE_TYPE (decl);
11944 itype = TYPE_ARG_TYPES (itype);
11945 itype = TREE_CHAIN (itype);
11946 itype = TREE_VALUE (itype);
11947 imode = TYPE_MODE (itype);
11948
11949 if (exchange && !can_atomic_exchange_p (imode, true))
11950 return false;
11951
11952 if (!useless_type_conversion_p (itype, type))
11953 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
11954 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
11955 build_int_cst (NULL,
11956 gimple_omp_atomic_seq_cst_p (stmt)
11957 ? MEMMODEL_SEQ_CST
11958 : MEMMODEL_RELAXED));
11959 if (exchange)
11960 {
11961 if (!useless_type_conversion_p (type, itype))
11962 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
11963 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
11964 }
11965
11966 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
11967 gsi_remove (&gsi, true);
11968
11969 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
11970 gsi = gsi_last_bb (load_bb);
11971 gsi_remove (&gsi, true);
11972
11973 if (gimple_in_ssa_p (cfun))
11974 update_ssa (TODO_update_ssa_no_phi);
11975
11976 return true;
11977 }
11978
11979 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
11980 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
11981 size of the data type, and thus usable to find the index of the builtin
11982 decl. Returns false if the expression is not of the proper form. */
11983
11984 static bool
11985 expand_omp_atomic_fetch_op (basic_block load_bb,
11986 tree addr, tree loaded_val,
11987 tree stored_val, int index)
11988 {
11989 enum built_in_function oldbase, newbase, tmpbase;
11990 tree decl, itype, call;
11991 tree lhs, rhs;
11992 basic_block store_bb = single_succ (load_bb);
11993 gimple_stmt_iterator gsi;
11994 gimple *stmt;
11995 location_t loc;
11996 enum tree_code code;
11997 bool need_old, need_new;
11998 machine_mode imode;
11999 bool seq_cst;
12000
12001 /* We expect to find the following sequences:
12002
12003 load_bb:
12004 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
12005
12006 store_bb:
12007 val = tmp OP something; (or: something OP tmp)
12008 GIMPLE_OMP_STORE (val)
12009
12010 ???FIXME: Allow a more flexible sequence.
12011 Perhaps use data flow to pick the statements.
12012
12013 */
12014
12015 gsi = gsi_after_labels (store_bb);
12016 stmt = gsi_stmt (gsi);
12017 loc = gimple_location (stmt);
12018 if (!is_gimple_assign (stmt))
12019 return false;
12020 gsi_next (&gsi);
12021 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
12022 return false;
12023 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
12024 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
12025 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
12026 gcc_checking_assert (!need_old || !need_new);
12027
12028 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
12029 return false;
12030
12031 /* Check for one of the supported fetch-op operations. */
12032 code = gimple_assign_rhs_code (stmt);
12033 switch (code)
12034 {
12035 case PLUS_EXPR:
12036 case POINTER_PLUS_EXPR:
12037 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
12038 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
12039 break;
12040 case MINUS_EXPR:
12041 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
12042 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
12043 break;
12044 case BIT_AND_EXPR:
12045 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
12046 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
12047 break;
12048 case BIT_IOR_EXPR:
12049 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
12050 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
12051 break;
12052 case BIT_XOR_EXPR:
12053 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
12054 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
12055 break;
12056 default:
12057 return false;
12058 }
12059
12060 /* Make sure the expression is of the proper form. */
12061 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
12062 rhs = gimple_assign_rhs2 (stmt);
12063 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
12064 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
12065 rhs = gimple_assign_rhs1 (stmt);
12066 else
12067 return false;
12068
12069 tmpbase = ((enum built_in_function)
12070 ((need_new ? newbase : oldbase) + index + 1));
12071 decl = builtin_decl_explicit (tmpbase);
12072 if (decl == NULL_TREE)
12073 return false;
12074 itype = TREE_TYPE (TREE_TYPE (decl));
12075 imode = TYPE_MODE (itype);
12076
12077 /* We could test all of the various optabs involved, but the fact of the
12078 matter is that (with the exception of i486 vs i586 and xadd) all targets
12079 that support any atomic operaton optab also implements compare-and-swap.
12080 Let optabs.c take care of expanding any compare-and-swap loop. */
12081 if (!can_compare_and_swap_p (imode, true))
12082 return false;
12083
12084 gsi = gsi_last_bb (load_bb);
12085 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
12086
12087 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
12088 It only requires that the operation happen atomically. Thus we can
12089 use the RELAXED memory model. */
12090 call = build_call_expr_loc (loc, decl, 3, addr,
12091 fold_convert_loc (loc, itype, rhs),
12092 build_int_cst (NULL,
12093 seq_cst ? MEMMODEL_SEQ_CST
12094 : MEMMODEL_RELAXED));
12095
12096 if (need_old || need_new)
12097 {
12098 lhs = need_old ? loaded_val : stored_val;
12099 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
12100 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
12101 }
12102 else
12103 call = fold_convert_loc (loc, void_type_node, call);
12104 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
12105 gsi_remove (&gsi, true);
12106
12107 gsi = gsi_last_bb (store_bb);
12108 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
12109 gsi_remove (&gsi, true);
12110 gsi = gsi_last_bb (store_bb);
12111 stmt = gsi_stmt (gsi);
12112 gsi_remove (&gsi, true);
12113
12114 if (gimple_in_ssa_p (cfun))
12115 {
12116 release_defs (stmt);
12117 update_ssa (TODO_update_ssa_no_phi);
12118 }
12119
12120 return true;
12121 }
12122
12123 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
12124
12125 oldval = *addr;
12126 repeat:
12127 newval = rhs; // with oldval replacing *addr in rhs
12128 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
12129 if (oldval != newval)
12130 goto repeat;
12131
12132 INDEX is log2 of the size of the data type, and thus usable to find the
12133 index of the builtin decl. */
12134
12135 static bool
12136 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
12137 tree addr, tree loaded_val, tree stored_val,
12138 int index)
12139 {
12140 tree loadedi, storedi, initial, new_storedi, old_vali;
12141 tree type, itype, cmpxchg, iaddr;
12142 gimple_stmt_iterator si;
12143 basic_block loop_header = single_succ (load_bb);
12144 gimple *phi, *stmt;
12145 edge e;
12146 enum built_in_function fncode;
12147
12148 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
12149 order to use the RELAXED memory model effectively. */
12150 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
12151 + index + 1);
12152 cmpxchg = builtin_decl_explicit (fncode);
12153 if (cmpxchg == NULL_TREE)
12154 return false;
12155 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
12156 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
12157
12158 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
12159 return false;
12160
12161 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
12162 si = gsi_last_bb (load_bb);
12163 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
12164
12165 /* For floating-point values, we'll need to view-convert them to integers
12166 so that we can perform the atomic compare and swap. Simplify the
12167 following code by always setting up the "i"ntegral variables. */
12168 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
12169 {
12170 tree iaddr_val;
12171
12172 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
12173 true));
12174 iaddr_val
12175 = force_gimple_operand_gsi (&si,
12176 fold_convert (TREE_TYPE (iaddr), addr),
12177 false, NULL_TREE, true, GSI_SAME_STMT);
12178 stmt = gimple_build_assign (iaddr, iaddr_val);
12179 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12180 loadedi = create_tmp_var (itype);
12181 if (gimple_in_ssa_p (cfun))
12182 loadedi = make_ssa_name (loadedi);
12183 }
12184 else
12185 {
12186 iaddr = addr;
12187 loadedi = loaded_val;
12188 }
12189
12190 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
12191 tree loaddecl = builtin_decl_explicit (fncode);
12192 if (loaddecl)
12193 initial
12194 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
12195 build_call_expr (loaddecl, 2, iaddr,
12196 build_int_cst (NULL_TREE,
12197 MEMMODEL_RELAXED)));
12198 else
12199 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
12200 build_int_cst (TREE_TYPE (iaddr), 0));
12201
12202 initial
12203 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
12204 GSI_SAME_STMT);
12205
12206 /* Move the value to the LOADEDI temporary. */
12207 if (gimple_in_ssa_p (cfun))
12208 {
12209 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
12210 phi = create_phi_node (loadedi, loop_header);
12211 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
12212 initial);
12213 }
12214 else
12215 gsi_insert_before (&si,
12216 gimple_build_assign (loadedi, initial),
12217 GSI_SAME_STMT);
12218 if (loadedi != loaded_val)
12219 {
12220 gimple_stmt_iterator gsi2;
12221 tree x;
12222
12223 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
12224 gsi2 = gsi_start_bb (loop_header);
12225 if (gimple_in_ssa_p (cfun))
12226 {
12227 gassign *stmt;
12228 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
12229 true, GSI_SAME_STMT);
12230 stmt = gimple_build_assign (loaded_val, x);
12231 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
12232 }
12233 else
12234 {
12235 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
12236 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
12237 true, GSI_SAME_STMT);
12238 }
12239 }
12240 gsi_remove (&si, true);
12241
12242 si = gsi_last_bb (store_bb);
12243 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
12244
12245 if (iaddr == addr)
12246 storedi = stored_val;
12247 else
12248 storedi =
12249 force_gimple_operand_gsi (&si,
12250 build1 (VIEW_CONVERT_EXPR, itype,
12251 stored_val), true, NULL_TREE, true,
12252 GSI_SAME_STMT);
12253
12254 /* Build the compare&swap statement. */
12255 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
12256 new_storedi = force_gimple_operand_gsi (&si,
12257 fold_convert (TREE_TYPE (loadedi),
12258 new_storedi),
12259 true, NULL_TREE,
12260 true, GSI_SAME_STMT);
12261
12262 if (gimple_in_ssa_p (cfun))
12263 old_vali = loadedi;
12264 else
12265 {
12266 old_vali = create_tmp_var (TREE_TYPE (loadedi));
12267 stmt = gimple_build_assign (old_vali, loadedi);
12268 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12269
12270 stmt = gimple_build_assign (loadedi, new_storedi);
12271 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12272 }
12273
12274 /* Note that we always perform the comparison as an integer, even for
12275 floating point. This allows the atomic operation to properly
12276 succeed even with NaNs and -0.0. */
12277 stmt = gimple_build_cond_empty
12278 (build2 (NE_EXPR, boolean_type_node,
12279 new_storedi, old_vali));
12280 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12281
12282 /* Update cfg. */
12283 e = single_succ_edge (store_bb);
12284 e->flags &= ~EDGE_FALLTHRU;
12285 e->flags |= EDGE_FALSE_VALUE;
12286
12287 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
12288
12289 /* Copy the new value to loadedi (we already did that before the condition
12290 if we are not in SSA). */
12291 if (gimple_in_ssa_p (cfun))
12292 {
12293 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
12294 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
12295 }
12296
12297 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
12298 gsi_remove (&si, true);
12299
12300 struct loop *loop = alloc_loop ();
12301 loop->header = loop_header;
12302 loop->latch = store_bb;
12303 add_loop (loop, loop_header->loop_father);
12304
12305 if (gimple_in_ssa_p (cfun))
12306 update_ssa (TODO_update_ssa_no_phi);
12307
12308 return true;
12309 }
12310
12311 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
12312
12313 GOMP_atomic_start ();
12314 *addr = rhs;
12315 GOMP_atomic_end ();
12316
12317 The result is not globally atomic, but works so long as all parallel
12318 references are within #pragma omp atomic directives. According to
12319 responses received from omp@openmp.org, appears to be within spec.
12320 Which makes sense, since that's how several other compilers handle
12321 this situation as well.
12322 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
12323 expanding. STORED_VAL is the operand of the matching
12324 GIMPLE_OMP_ATOMIC_STORE.
12325
12326 We replace
12327 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
12328 loaded_val = *addr;
12329
12330 and replace
12331 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
12332 *addr = stored_val;
12333 */
12334
12335 static bool
12336 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
12337 tree addr, tree loaded_val, tree stored_val)
12338 {
12339 gimple_stmt_iterator si;
12340 gassign *stmt;
12341 tree t;
12342
12343 si = gsi_last_bb (load_bb);
12344 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
12345
12346 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
12347 t = build_call_expr (t, 0);
12348 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
12349
12350 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
12351 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12352 gsi_remove (&si, true);
12353
12354 si = gsi_last_bb (store_bb);
12355 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
12356
12357 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
12358 stored_val);
12359 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
12360
12361 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
12362 t = build_call_expr (t, 0);
12363 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
12364 gsi_remove (&si, true);
12365
12366 if (gimple_in_ssa_p (cfun))
12367 update_ssa (TODO_update_ssa_no_phi);
12368 return true;
12369 }
12370
12371 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
12372 using expand_omp_atomic_fetch_op. If it failed, we try to
12373 call expand_omp_atomic_pipeline, and if it fails too, the
12374 ultimate fallback is wrapping the operation in a mutex
12375 (expand_omp_atomic_mutex). REGION is the atomic region built
12376 by build_omp_regions_1(). */
12377
12378 static void
12379 expand_omp_atomic (struct omp_region *region)
12380 {
12381 basic_block load_bb = region->entry, store_bb = region->exit;
12382 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
12383 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
12384 tree loaded_val = gimple_omp_atomic_load_lhs (load);
12385 tree addr = gimple_omp_atomic_load_rhs (load);
12386 tree stored_val = gimple_omp_atomic_store_val (store);
12387 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
12388 HOST_WIDE_INT index;
12389
12390 /* Make sure the type is one of the supported sizes. */
12391 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
12392 index = exact_log2 (index);
12393 if (index >= 0 && index <= 4)
12394 {
12395 unsigned int align = TYPE_ALIGN_UNIT (type);
12396
12397 /* __sync builtins require strict data alignment. */
12398 if (exact_log2 (align) >= index)
12399 {
12400 /* Atomic load. */
12401 if (loaded_val == stored_val
12402 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
12403 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
12404 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
12405 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
12406 return;
12407
12408 /* Atomic store. */
12409 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
12410 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
12411 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
12412 && store_bb == single_succ (load_bb)
12413 && first_stmt (store_bb) == store
12414 && expand_omp_atomic_store (load_bb, addr, loaded_val,
12415 stored_val, index))
12416 return;
12417
12418 /* When possible, use specialized atomic update functions. */
12419 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
12420 && store_bb == single_succ (load_bb)
12421 && expand_omp_atomic_fetch_op (load_bb, addr,
12422 loaded_val, stored_val, index))
12423 return;
12424
12425 /* If we don't have specialized __sync builtins, try and implement
12426 as a compare and swap loop. */
12427 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
12428 loaded_val, stored_val, index))
12429 return;
12430 }
12431 }
12432
12433 /* The ultimate fallback is wrapping the operation in a mutex. */
12434 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
12435 }
12436
12437
12438 /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
12439 macro on gomp-constants.h. We do not check for overflow. */
12440
12441 static tree
12442 oacc_launch_pack (unsigned code, tree device, unsigned op)
12443 {
12444 tree res;
12445
12446 res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op));
12447 if (device)
12448 {
12449 device = fold_build2 (LSHIFT_EXPR, unsigned_type_node,
12450 device, build_int_cst (unsigned_type_node,
12451 GOMP_LAUNCH_DEVICE_SHIFT));
12452 res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device);
12453 }
12454 return res;
12455 }
12456
12457 /* Look for compute grid dimension clauses and convert to an attribute
12458 attached to FN. This permits the target-side code to (a) massage
12459 the dimensions, (b) emit that data and (c) optimize. Non-constant
12460 dimensions are pushed onto ARGS.
12461
12462 The attribute value is a TREE_LIST. A set of dimensions is
12463 represented as a list of INTEGER_CST. Those that are runtime
12464 exprs are represented as an INTEGER_CST of zero.
12465
12466 TOOO. Normally the attribute will just contain a single such list. If
12467 however it contains a list of lists, this will represent the use of
12468 device_type. Each member of the outer list is an assoc list of
12469 dimensions, keyed by the device type. The first entry will be the
12470 default. Well, that's the plan. */
12471
12472 #define OACC_FN_ATTRIB "oacc function"
12473
12474 /* Replace any existing oacc fn attribute with updated dimensions. */
12475
12476 void
12477 replace_oacc_fn_attrib (tree fn, tree dims)
12478 {
12479 tree ident = get_identifier (OACC_FN_ATTRIB);
12480 tree attribs = DECL_ATTRIBUTES (fn);
12481
12482 /* If we happen to be present as the first attrib, drop it. */
12483 if (attribs && TREE_PURPOSE (attribs) == ident)
12484 attribs = TREE_CHAIN (attribs);
12485 DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs);
12486 }
12487
12488 /* Scan CLAUSES for launch dimensions and attach them to the oacc
12489 function attribute. Push any that are non-constant onto the ARGS
12490 list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is
12491 true, if these are for a kernels region offload function. */
12492
12493 void
12494 set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args)
12495 {
12496 /* Must match GOMP_DIM ordering. */
12497 static const omp_clause_code ids[]
12498 = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS,
12499 OMP_CLAUSE_VECTOR_LENGTH };
12500 unsigned ix;
12501 tree dims[GOMP_DIM_MAX];
12502 tree attr = NULL_TREE;
12503 unsigned non_const = 0;
12504
12505 for (ix = GOMP_DIM_MAX; ix--;)
12506 {
12507 tree clause = find_omp_clause (clauses, ids[ix]);
12508 tree dim = NULL_TREE;
12509
12510 if (clause)
12511 dim = OMP_CLAUSE_EXPR (clause, ids[ix]);
12512 dims[ix] = dim;
12513 if (dim && TREE_CODE (dim) != INTEGER_CST)
12514 {
12515 dim = integer_zero_node;
12516 non_const |= GOMP_DIM_MASK (ix);
12517 }
12518 attr = tree_cons (NULL_TREE, dim, attr);
12519 /* Note kernelness with TREE_PUBLIC. */
12520 if (is_kernel)
12521 TREE_PUBLIC (attr) = 1;
12522 }
12523
12524 replace_oacc_fn_attrib (fn, attr);
12525
12526 if (non_const)
12527 {
12528 /* Push a dynamic argument set. */
12529 args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM,
12530 NULL_TREE, non_const));
12531 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
12532 if (non_const & GOMP_DIM_MASK (ix))
12533 args->safe_push (dims[ix]);
12534 }
12535 }
12536
12537 /* Process the routine's dimension clauess to generate an attribute
12538 value. Issue diagnostics as appropriate. We default to SEQ
12539 (OpenACC 2.5 clarifies this). All dimensions have a size of zero
12540 (dynamic). TREE_PURPOSE is set to indicate whether that dimension
12541 can have a loop partitioned on it. non-zero indicates
12542 yes, zero indicates no. By construction once a non-zero has been
12543 reached, further inner dimensions must also be non-zero. We set
12544 TREE_VALUE to zero for the dimensions that may be partitioned and
12545 1 for the other ones -- if a loop is (erroneously) spawned at
12546 an outer level, we don't want to try and partition it. */
12547
12548 tree
12549 build_oacc_routine_dims (tree clauses)
12550 {
12551 /* Must match GOMP_DIM ordering. */
12552 static const omp_clause_code ids[] =
12553 {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ};
12554 int ix;
12555 int level = -1;
12556
12557 for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses))
12558 for (ix = GOMP_DIM_MAX + 1; ix--;)
12559 if (OMP_CLAUSE_CODE (clauses) == ids[ix])
12560 {
12561 if (level >= 0)
12562 error_at (OMP_CLAUSE_LOCATION (clauses),
12563 "multiple loop axes specified for routine");
12564 level = ix;
12565 break;
12566 }
12567
12568 /* Default to SEQ. */
12569 if (level < 0)
12570 level = GOMP_DIM_MAX;
12571
12572 tree dims = NULL_TREE;
12573
12574 for (ix = GOMP_DIM_MAX; ix--;)
12575 dims = tree_cons (build_int_cst (boolean_type_node, ix >= level),
12576 build_int_cst (integer_type_node, ix < level), dims);
12577
12578 return dims;
12579 }
12580
12581 /* Retrieve the oacc function attrib and return it. Non-oacc
12582 functions will return NULL. */
12583
12584 tree
12585 get_oacc_fn_attrib (tree fn)
12586 {
12587 return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
12588 }
12589
12590 /* Return true if this oacc fn attrib is for a kernels offload
12591 region. We use the TREE_PUBLIC flag of each dimension -- only
12592 need to check the first one. */
12593
12594 bool
12595 oacc_fn_attrib_kernels_p (tree attr)
12596 {
12597 return TREE_PUBLIC (TREE_VALUE (attr));
12598 }
12599
12600 /* Return level at which oacc routine may spawn a partitioned loop, or
12601 -1 if it is not a routine (i.e. is an offload fn). */
12602
12603 static int
12604 oacc_fn_attrib_level (tree attr)
12605 {
12606 tree pos = TREE_VALUE (attr);
12607
12608 if (!TREE_PURPOSE (pos))
12609 return -1;
12610
12611 int ix = 0;
12612 for (ix = 0; ix != GOMP_DIM_MAX;
12613 ix++, pos = TREE_CHAIN (pos))
12614 if (!integer_zerop (TREE_PURPOSE (pos)))
12615 break;
12616
12617 return ix;
12618 }
12619
12620 /* Extract an oacc execution dimension from FN. FN must be an
12621 offloaded function or routine that has already had its execution
12622 dimensions lowered to the target-specific values. */
12623
12624 int
12625 get_oacc_fn_dim_size (tree fn, int axis)
12626 {
12627 tree attrs = get_oacc_fn_attrib (fn);
12628
12629 gcc_assert (axis < GOMP_DIM_MAX);
12630
12631 tree dims = TREE_VALUE (attrs);
12632 while (axis--)
12633 dims = TREE_CHAIN (dims);
12634
12635 int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
12636
12637 return size;
12638 }
12639
12640 /* Extract the dimension axis from an IFN_GOACC_DIM_POS or
12641 IFN_GOACC_DIM_SIZE call. */
12642
12643 int
12644 get_oacc_ifn_dim_arg (const gimple *stmt)
12645 {
12646 gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE
12647 || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
12648 tree arg = gimple_call_arg (stmt, 0);
12649 HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg);
12650
12651 gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX);
12652 return (int) axis;
12653 }
12654
12655 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
12656 at REGION_EXIT. */
12657
12658 static void
12659 mark_loops_in_oacc_kernels_region (basic_block region_entry,
12660 basic_block region_exit)
12661 {
12662 struct loop *outer = region_entry->loop_father;
12663 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
12664
12665 /* Don't parallelize the kernels region if it contains more than one outer
12666 loop. */
12667 unsigned int nr_outer_loops = 0;
12668 struct loop *single_outer = NULL;
12669 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
12670 {
12671 gcc_assert (loop_outer (loop) == outer);
12672
12673 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
12674 continue;
12675
12676 if (region_exit != NULL
12677 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
12678 continue;
12679
12680 nr_outer_loops++;
12681 single_outer = loop;
12682 }
12683 if (nr_outer_loops != 1)
12684 return;
12685
12686 for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner)
12687 if (loop->next)
12688 return;
12689
12690 /* Mark the loops in the region. */
12691 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
12692 loop->in_oacc_kernels_region = true;
12693 }
12694
12695 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
12696
12697 struct GTY(()) grid_launch_attributes_trees
12698 {
12699 tree kernel_dim_array_type;
12700 tree kernel_lattrs_dimnum_decl;
12701 tree kernel_lattrs_grid_decl;
12702 tree kernel_lattrs_group_decl;
12703 tree kernel_launch_attributes_type;
12704 };
12705
12706 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
12707
12708 /* Create types used to pass kernel launch attributes to target. */
12709
12710 static void
12711 grid_create_kernel_launch_attr_types (void)
12712 {
12713 if (grid_attr_trees)
12714 return;
12715 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
12716
12717 tree dim_arr_index_type
12718 = build_index_type (build_int_cst (integer_type_node, 2));
12719 grid_attr_trees->kernel_dim_array_type
12720 = build_array_type (uint32_type_node, dim_arr_index_type);
12721
12722 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
12723 grid_attr_trees->kernel_lattrs_dimnum_decl
12724 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
12725 uint32_type_node);
12726 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
12727
12728 grid_attr_trees->kernel_lattrs_grid_decl
12729 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
12730 grid_attr_trees->kernel_dim_array_type);
12731 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
12732 = grid_attr_trees->kernel_lattrs_dimnum_decl;
12733 grid_attr_trees->kernel_lattrs_group_decl
12734 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
12735 grid_attr_trees->kernel_dim_array_type);
12736 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
12737 = grid_attr_trees->kernel_lattrs_grid_decl;
12738 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
12739 "__gomp_kernel_launch_attributes",
12740 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
12741 }
12742
12743 /* Insert before the current statement in GSI a store of VALUE to INDEX of
12744 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
12745 of type uint32_type_node. */
12746
12747 static void
12748 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
12749 tree fld_decl, int index, tree value)
12750 {
12751 tree ref = build4 (ARRAY_REF, uint32_type_node,
12752 build3 (COMPONENT_REF,
12753 grid_attr_trees->kernel_dim_array_type,
12754 range_var, fld_decl, NULL_TREE),
12755 build_int_cst (integer_type_node, index),
12756 NULL_TREE, NULL_TREE);
12757 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
12758 }
12759
12760 /* Return a tree representation of a pointer to a structure with grid and
12761 work-group size information. Statements filling that information will be
12762 inserted before GSI, TGT_STMT is the target statement which has the
12763 necessary information in it. */
12764
12765 static tree
12766 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
12767 gomp_target *tgt_stmt)
12768 {
12769 grid_create_kernel_launch_attr_types ();
12770 tree u32_one = build_one_cst (uint32_type_node);
12771 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
12772 "__kernel_launch_attrs");
12773
12774 unsigned max_dim = 0;
12775 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
12776 clause;
12777 clause = OMP_CLAUSE_CHAIN (clause))
12778 {
12779 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
12780 continue;
12781
12782 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
12783 max_dim = MAX (dim, max_dim);
12784
12785 grid_insert_store_range_dim (gsi, lattrs,
12786 grid_attr_trees->kernel_lattrs_grid_decl,
12787 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
12788 grid_insert_store_range_dim (gsi, lattrs,
12789 grid_attr_trees->kernel_lattrs_group_decl,
12790 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
12791 }
12792
12793 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
12794 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
12795 /* At this moment we cannot gridify a loop with a collapse clause. */
12796 /* TODO: Adjust when we support bigger collapse. */
12797 gcc_assert (max_dim == 0);
12798 gsi_insert_before (gsi, gimple_build_assign (dimref, u32_one), GSI_SAME_STMT);
12799 TREE_ADDRESSABLE (lattrs) = 1;
12800 return build_fold_addr_expr (lattrs);
12801 }
12802
12803 /* Build target argument identifier from the DEVICE identifier, value
12804 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
12805
12806 static tree
12807 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
12808 {
12809 tree t = build_int_cst (integer_type_node, device);
12810 if (subseqent_param)
12811 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
12812 build_int_cst (integer_type_node,
12813 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
12814 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
12815 build_int_cst (integer_type_node, id));
12816 return t;
12817 }
12818
12819 /* Like above but return it in type that can be directly stored as an element
12820 of the argument array. */
12821
12822 static tree
12823 get_target_argument_identifier (int device, bool subseqent_param, int id)
12824 {
12825 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
12826 return fold_convert (ptr_type_node, t);
12827 }
12828
12829 /* Return a target argument consisting of DEVICE identifier, value identifier
12830 ID, and the actual VALUE. */
12831
12832 static tree
12833 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
12834 tree value)
12835 {
12836 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
12837 fold_convert (integer_type_node, value),
12838 build_int_cst (unsigned_type_node,
12839 GOMP_TARGET_ARG_VALUE_SHIFT));
12840 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
12841 get_target_argument_identifier_1 (device, false, id));
12842 t = fold_convert (ptr_type_node, t);
12843 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
12844 }
12845
12846 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
12847 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
12848 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
12849 arguments. */
12850
12851 static void
12852 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
12853 int id, tree value, vec <tree> *args)
12854 {
12855 if (tree_fits_shwi_p (value)
12856 && tree_to_shwi (value) > -(1 << 15)
12857 && tree_to_shwi (value) < (1 << 15))
12858 args->quick_push (get_target_argument_value (gsi, device, id, value));
12859 else
12860 {
12861 args->quick_push (get_target_argument_identifier (device, true, id));
12862 value = fold_convert (ptr_type_node, value);
12863 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
12864 GSI_SAME_STMT);
12865 args->quick_push (value);
12866 }
12867 }
12868
12869 /* Create an array of arguments that is then passed to GOMP_target. */
12870
12871 static tree
12872 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
12873 {
12874 auto_vec <tree, 6> args;
12875 tree clauses = gimple_omp_target_clauses (tgt_stmt);
12876 tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
12877 if (c)
12878 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
12879 else
12880 t = integer_minus_one_node;
12881 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
12882 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
12883
12884 c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
12885 if (c)
12886 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
12887 else
12888 t = integer_minus_one_node;
12889 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
12890 GOMP_TARGET_ARG_THREAD_LIMIT, t,
12891 &args);
12892
12893 /* Add HSA-specific grid sizes, if available. */
12894 if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
12895 OMP_CLAUSE__GRIDDIM_))
12896 {
12897 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true,
12898 GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES);
12899 args.quick_push (t);
12900 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
12901 }
12902
12903 /* Produce more, perhaps device specific, arguments here. */
12904
12905 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
12906 args.length () + 1),
12907 ".omp_target_args");
12908 for (unsigned i = 0; i < args.length (); i++)
12909 {
12910 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
12911 build_int_cst (integer_type_node, i),
12912 NULL_TREE, NULL_TREE);
12913 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
12914 GSI_SAME_STMT);
12915 }
12916 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
12917 build_int_cst (integer_type_node, args.length ()),
12918 NULL_TREE, NULL_TREE);
12919 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
12920 GSI_SAME_STMT);
12921 TREE_ADDRESSABLE (argarray) = 1;
12922 return build_fold_addr_expr (argarray);
12923 }
12924
12925 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
12926
12927 static void
12928 expand_omp_target (struct omp_region *region)
12929 {
12930 basic_block entry_bb, exit_bb, new_bb;
12931 struct function *child_cfun;
12932 tree child_fn, block, t;
12933 gimple_stmt_iterator gsi;
12934 gomp_target *entry_stmt;
12935 gimple *stmt;
12936 edge e;
12937 bool offloaded, data_region;
12938
12939 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
12940 new_bb = region->entry;
12941
12942 offloaded = is_gimple_omp_offloaded (entry_stmt);
12943 switch (gimple_omp_target_kind (entry_stmt))
12944 {
12945 case GF_OMP_TARGET_KIND_REGION:
12946 case GF_OMP_TARGET_KIND_UPDATE:
12947 case GF_OMP_TARGET_KIND_ENTER_DATA:
12948 case GF_OMP_TARGET_KIND_EXIT_DATA:
12949 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
12950 case GF_OMP_TARGET_KIND_OACC_KERNELS:
12951 case GF_OMP_TARGET_KIND_OACC_UPDATE:
12952 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
12953 case GF_OMP_TARGET_KIND_OACC_DECLARE:
12954 data_region = false;
12955 break;
12956 case GF_OMP_TARGET_KIND_DATA:
12957 case GF_OMP_TARGET_KIND_OACC_DATA:
12958 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
12959 data_region = true;
12960 break;
12961 default:
12962 gcc_unreachable ();
12963 }
12964
12965 child_fn = NULL_TREE;
12966 child_cfun = NULL;
12967 if (offloaded)
12968 {
12969 child_fn = gimple_omp_target_child_fn (entry_stmt);
12970 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
12971 }
12972
12973 /* Supported by expand_omp_taskreg, but not here. */
12974 if (child_cfun != NULL)
12975 gcc_checking_assert (!child_cfun->cfg);
12976 gcc_checking_assert (!gimple_in_ssa_p (cfun));
12977
12978 entry_bb = region->entry;
12979 exit_bb = region->exit;
12980
12981 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
12982 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
12983
12984 if (offloaded)
12985 {
12986 unsigned srcidx, dstidx, num;
12987
12988 /* If the offloading region needs data sent from the parent
12989 function, then the very first statement (except possible
12990 tree profile counter updates) of the offloading body
12991 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
12992 &.OMP_DATA_O is passed as an argument to the child function,
12993 we need to replace it with the argument as seen by the child
12994 function.
12995
12996 In most cases, this will end up being the identity assignment
12997 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
12998 a function call that has been inlined, the original PARM_DECL
12999 .OMP_DATA_I may have been converted into a different local
13000 variable. In which case, we need to keep the assignment. */
13001 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
13002 if (data_arg)
13003 {
13004 basic_block entry_succ_bb = single_succ (entry_bb);
13005 gimple_stmt_iterator gsi;
13006 tree arg;
13007 gimple *tgtcopy_stmt = NULL;
13008 tree sender = TREE_VEC_ELT (data_arg, 0);
13009
13010 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
13011 {
13012 gcc_assert (!gsi_end_p (gsi));
13013 stmt = gsi_stmt (gsi);
13014 if (gimple_code (stmt) != GIMPLE_ASSIGN)
13015 continue;
13016
13017 if (gimple_num_ops (stmt) == 2)
13018 {
13019 tree arg = gimple_assign_rhs1 (stmt);
13020
13021 /* We're ignoring the subcode because we're
13022 effectively doing a STRIP_NOPS. */
13023
13024 if (TREE_CODE (arg) == ADDR_EXPR
13025 && TREE_OPERAND (arg, 0) == sender)
13026 {
13027 tgtcopy_stmt = stmt;
13028 break;
13029 }
13030 }
13031 }
13032
13033 gcc_assert (tgtcopy_stmt != NULL);
13034 arg = DECL_ARGUMENTS (child_fn);
13035
13036 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
13037 gsi_remove (&gsi, true);
13038 }
13039
13040 /* Declare local variables needed in CHILD_CFUN. */
13041 block = DECL_INITIAL (child_fn);
13042 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
13043 /* The gimplifier could record temporaries in the offloading block
13044 rather than in containing function's local_decls chain,
13045 which would mean cgraph missed finalizing them. Do it now. */
13046 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
13047 if (TREE_CODE (t) == VAR_DECL
13048 && TREE_STATIC (t)
13049 && !DECL_EXTERNAL (t))
13050 varpool_node::finalize_decl (t);
13051 DECL_SAVED_TREE (child_fn) = NULL;
13052 /* We'll create a CFG for child_fn, so no gimple body is needed. */
13053 gimple_set_body (child_fn, NULL);
13054 TREE_USED (block) = 1;
13055
13056 /* Reset DECL_CONTEXT on function arguments. */
13057 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
13058 DECL_CONTEXT (t) = child_fn;
13059
13060 /* Split ENTRY_BB at GIMPLE_*,
13061 so that it can be moved to the child function. */
13062 gsi = gsi_last_bb (entry_bb);
13063 stmt = gsi_stmt (gsi);
13064 gcc_assert (stmt
13065 && gimple_code (stmt) == gimple_code (entry_stmt));
13066 e = split_block (entry_bb, stmt);
13067 gsi_remove (&gsi, true);
13068 entry_bb = e->dest;
13069 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
13070
13071 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
13072 if (exit_bb)
13073 {
13074 gsi = gsi_last_bb (exit_bb);
13075 gcc_assert (!gsi_end_p (gsi)
13076 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13077 stmt = gimple_build_return (NULL);
13078 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
13079 gsi_remove (&gsi, true);
13080 }
13081
13082 /* Move the offloading region into CHILD_CFUN. */
13083
13084 block = gimple_block (entry_stmt);
13085
13086 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
13087 if (exit_bb)
13088 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
13089 /* When the OMP expansion process cannot guarantee an up-to-date
13090 loop tree arrange for the child function to fixup loops. */
13091 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
13092 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
13093
13094 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
13095 num = vec_safe_length (child_cfun->local_decls);
13096 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
13097 {
13098 t = (*child_cfun->local_decls)[srcidx];
13099 if (DECL_CONTEXT (t) == cfun->decl)
13100 continue;
13101 if (srcidx != dstidx)
13102 (*child_cfun->local_decls)[dstidx] = t;
13103 dstidx++;
13104 }
13105 if (dstidx != num)
13106 vec_safe_truncate (child_cfun->local_decls, dstidx);
13107
13108 /* Inform the callgraph about the new function. */
13109 child_cfun->curr_properties = cfun->curr_properties;
13110 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
13111 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
13112 cgraph_node *node = cgraph_node::get_create (child_fn);
13113 node->parallelized_function = 1;
13114 cgraph_node::add_new_function (child_fn, true);
13115
13116 /* Add the new function to the offload table. */
13117 if (ENABLE_OFFLOADING)
13118 vec_safe_push (offload_funcs, child_fn);
13119
13120 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
13121 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
13122
13123 /* Fix the callgraph edges for child_cfun. Those for cfun will be
13124 fixed in a following pass. */
13125 push_cfun (child_cfun);
13126 if (need_asm)
13127 assign_assembler_name_if_neeeded (child_fn);
13128 cgraph_edge::rebuild_edges ();
13129
13130 /* Some EH regions might become dead, see PR34608. If
13131 pass_cleanup_cfg isn't the first pass to happen with the
13132 new child, these dead EH edges might cause problems.
13133 Clean them up now. */
13134 if (flag_exceptions)
13135 {
13136 basic_block bb;
13137 bool changed = false;
13138
13139 FOR_EACH_BB_FN (bb, cfun)
13140 changed |= gimple_purge_dead_eh_edges (bb);
13141 if (changed)
13142 cleanup_tree_cfg ();
13143 }
13144 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
13145 verify_loop_structure ();
13146 pop_cfun ();
13147
13148 if (dump_file && !gimple_in_ssa_p (cfun))
13149 {
13150 omp_any_child_fn_dumped = true;
13151 dump_function_header (dump_file, child_fn, dump_flags);
13152 dump_function_to_file (child_fn, dump_file, dump_flags);
13153 }
13154 }
13155
13156 /* Emit a library call to launch the offloading region, or do data
13157 transfers. */
13158 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
13159 enum built_in_function start_ix;
13160 location_t clause_loc;
13161 unsigned int flags_i = 0;
13162 bool oacc_kernels_p = false;
13163
13164 switch (gimple_omp_target_kind (entry_stmt))
13165 {
13166 case GF_OMP_TARGET_KIND_REGION:
13167 start_ix = BUILT_IN_GOMP_TARGET;
13168 break;
13169 case GF_OMP_TARGET_KIND_DATA:
13170 start_ix = BUILT_IN_GOMP_TARGET_DATA;
13171 break;
13172 case GF_OMP_TARGET_KIND_UPDATE:
13173 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
13174 break;
13175 case GF_OMP_TARGET_KIND_ENTER_DATA:
13176 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
13177 break;
13178 case GF_OMP_TARGET_KIND_EXIT_DATA:
13179 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
13180 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
13181 break;
13182 case GF_OMP_TARGET_KIND_OACC_KERNELS:
13183 oacc_kernels_p = true;
13184 /* FALLTHROUGH */
13185 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
13186 start_ix = BUILT_IN_GOACC_PARALLEL;
13187 break;
13188 case GF_OMP_TARGET_KIND_OACC_DATA:
13189 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
13190 start_ix = BUILT_IN_GOACC_DATA_START;
13191 break;
13192 case GF_OMP_TARGET_KIND_OACC_UPDATE:
13193 start_ix = BUILT_IN_GOACC_UPDATE;
13194 break;
13195 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
13196 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
13197 break;
13198 case GF_OMP_TARGET_KIND_OACC_DECLARE:
13199 start_ix = BUILT_IN_GOACC_DECLARE;
13200 break;
13201 default:
13202 gcc_unreachable ();
13203 }
13204
13205 clauses = gimple_omp_target_clauses (entry_stmt);
13206
13207 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
13208 library choose) and there is no conditional. */
13209 cond = NULL_TREE;
13210 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
13211
13212 c = find_omp_clause (clauses, OMP_CLAUSE_IF);
13213 if (c)
13214 cond = OMP_CLAUSE_IF_EXPR (c);
13215
13216 c = find_omp_clause (clauses, OMP_CLAUSE_DEVICE);
13217 if (c)
13218 {
13219 /* Even if we pass it to all library function calls, it is currently only
13220 defined/used for the OpenMP target ones. */
13221 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
13222 || start_ix == BUILT_IN_GOMP_TARGET_DATA
13223 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
13224 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
13225
13226 device = OMP_CLAUSE_DEVICE_ID (c);
13227 clause_loc = OMP_CLAUSE_LOCATION (c);
13228 }
13229 else
13230 clause_loc = gimple_location (entry_stmt);
13231
13232 c = find_omp_clause (clauses, OMP_CLAUSE_NOWAIT);
13233 if (c)
13234 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
13235
13236 /* Ensure 'device' is of the correct type. */
13237 device = fold_convert_loc (clause_loc, integer_type_node, device);
13238
13239 /* If we found the clause 'if (cond)', build
13240 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
13241 if (cond)
13242 {
13243 cond = gimple_boolify (cond);
13244
13245 basic_block cond_bb, then_bb, else_bb;
13246 edge e;
13247 tree tmp_var;
13248
13249 tmp_var = create_tmp_var (TREE_TYPE (device));
13250 if (offloaded)
13251 e = split_block_after_labels (new_bb);
13252 else
13253 {
13254 gsi = gsi_last_bb (new_bb);
13255 gsi_prev (&gsi);
13256 e = split_block (new_bb, gsi_stmt (gsi));
13257 }
13258 cond_bb = e->src;
13259 new_bb = e->dest;
13260 remove_edge (e);
13261
13262 then_bb = create_empty_bb (cond_bb);
13263 else_bb = create_empty_bb (then_bb);
13264 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
13265 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
13266
13267 stmt = gimple_build_cond_empty (cond);
13268 gsi = gsi_last_bb (cond_bb);
13269 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13270
13271 gsi = gsi_start_bb (then_bb);
13272 stmt = gimple_build_assign (tmp_var, device);
13273 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13274
13275 gsi = gsi_start_bb (else_bb);
13276 stmt = gimple_build_assign (tmp_var,
13277 build_int_cst (integer_type_node,
13278 GOMP_DEVICE_HOST_FALLBACK));
13279 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
13280
13281 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
13282 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
13283 add_bb_to_loop (then_bb, cond_bb->loop_father);
13284 add_bb_to_loop (else_bb, cond_bb->loop_father);
13285 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
13286 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
13287
13288 device = tmp_var;
13289 }
13290
13291 gsi = gsi_last_bb (new_bb);
13292 t = gimple_omp_target_data_arg (entry_stmt);
13293 if (t == NULL)
13294 {
13295 t1 = size_zero_node;
13296 t2 = build_zero_cst (ptr_type_node);
13297 t3 = t2;
13298 t4 = t2;
13299 }
13300 else
13301 {
13302 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
13303 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
13304 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
13305 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
13306 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
13307 }
13308
13309 gimple *g;
13310 bool tagging = false;
13311 /* The maximum number used by any start_ix, without varargs. */
13312 auto_vec<tree, 11> args;
13313 args.quick_push (device);
13314 if (offloaded)
13315 args.quick_push (build_fold_addr_expr (child_fn));
13316 args.quick_push (t1);
13317 args.quick_push (t2);
13318 args.quick_push (t3);
13319 args.quick_push (t4);
13320 switch (start_ix)
13321 {
13322 case BUILT_IN_GOACC_DATA_START:
13323 case BUILT_IN_GOACC_DECLARE:
13324 case BUILT_IN_GOMP_TARGET_DATA:
13325 break;
13326 case BUILT_IN_GOMP_TARGET:
13327 case BUILT_IN_GOMP_TARGET_UPDATE:
13328 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
13329 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
13330 c = find_omp_clause (clauses, OMP_CLAUSE_DEPEND);
13331 if (c)
13332 depend = OMP_CLAUSE_DECL (c);
13333 else
13334 depend = build_int_cst (ptr_type_node, 0);
13335 args.quick_push (depend);
13336 if (start_ix == BUILT_IN_GOMP_TARGET)
13337 args.quick_push (get_target_arguments (&gsi, entry_stmt));
13338 break;
13339 case BUILT_IN_GOACC_PARALLEL:
13340 {
13341 set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
13342 tagging = true;
13343 }
13344 /* FALLTHRU */
13345 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
13346 case BUILT_IN_GOACC_UPDATE:
13347 {
13348 tree t_async = NULL_TREE;
13349
13350 /* If present, use the value specified by the respective
13351 clause, making sure that is of the correct type. */
13352 c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC);
13353 if (c)
13354 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
13355 integer_type_node,
13356 OMP_CLAUSE_ASYNC_EXPR (c));
13357 else if (!tagging)
13358 /* Default values for t_async. */
13359 t_async = fold_convert_loc (gimple_location (entry_stmt),
13360 integer_type_node,
13361 build_int_cst (integer_type_node,
13362 GOMP_ASYNC_SYNC));
13363 if (tagging && t_async)
13364 {
13365 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
13366
13367 if (TREE_CODE (t_async) == INTEGER_CST)
13368 {
13369 /* See if we can pack the async arg in to the tag's
13370 operand. */
13371 i_async = TREE_INT_CST_LOW (t_async);
13372 if (i_async < GOMP_LAUNCH_OP_MAX)
13373 t_async = NULL_TREE;
13374 else
13375 i_async = GOMP_LAUNCH_OP_MAX;
13376 }
13377 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
13378 i_async));
13379 }
13380 if (t_async)
13381 args.safe_push (t_async);
13382
13383 /* Save the argument index, and ... */
13384 unsigned t_wait_idx = args.length ();
13385 unsigned num_waits = 0;
13386 c = find_omp_clause (clauses, OMP_CLAUSE_WAIT);
13387 if (!tagging || c)
13388 /* ... push a placeholder. */
13389 args.safe_push (integer_zero_node);
13390
13391 for (; c; c = OMP_CLAUSE_CHAIN (c))
13392 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
13393 {
13394 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
13395 integer_type_node,
13396 OMP_CLAUSE_WAIT_EXPR (c)));
13397 num_waits++;
13398 }
13399
13400 if (!tagging || num_waits)
13401 {
13402 tree len;
13403
13404 /* Now that we know the number, update the placeholder. */
13405 if (tagging)
13406 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
13407 else
13408 len = build_int_cst (integer_type_node, num_waits);
13409 len = fold_convert_loc (gimple_location (entry_stmt),
13410 unsigned_type_node, len);
13411 args[t_wait_idx] = len;
13412 }
13413 }
13414 break;
13415 default:
13416 gcc_unreachable ();
13417 }
13418 if (tagging)
13419 /* Push terminal marker - zero. */
13420 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
13421
13422 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
13423 gimple_set_location (g, gimple_location (entry_stmt));
13424 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
13425 if (!offloaded)
13426 {
13427 g = gsi_stmt (gsi);
13428 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
13429 gsi_remove (&gsi, true);
13430 }
13431 if (data_region && region->exit)
13432 {
13433 gsi = gsi_last_bb (region->exit);
13434 g = gsi_stmt (gsi);
13435 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
13436 gsi_remove (&gsi, true);
13437 }
13438 }
13439
13440 /* Expand KFOR loop as a GPGPU kernel, i.e. as a body only with iteration
13441 variable derived from the thread number. */
13442
13443 static void
13444 grid_expand_omp_for_loop (struct omp_region *kfor)
13445 {
13446 tree t, threadid;
13447 tree type, itype;
13448 gimple_stmt_iterator gsi;
13449 tree n1, step;
13450 struct omp_for_data fd;
13451
13452 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
13453 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
13454 == GF_OMP_FOR_KIND_GRID_LOOP);
13455 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
13456
13457 gcc_assert (gimple_omp_for_collapse (for_stmt) == 1);
13458 gcc_assert (kfor->cont);
13459 extract_omp_for_data (for_stmt, &fd, NULL);
13460
13461 itype = type = TREE_TYPE (fd.loop.v);
13462 if (POINTER_TYPE_P (type))
13463 itype = signed_type_for (type);
13464
13465 gsi = gsi_start_bb (body_bb);
13466
13467 n1 = fd.loop.n1;
13468 step = fd.loop.step;
13469 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
13470 true, NULL_TREE, true, GSI_SAME_STMT);
13471 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
13472 true, NULL_TREE, true, GSI_SAME_STMT);
13473 threadid = build_call_expr (builtin_decl_explicit
13474 (BUILT_IN_OMP_GET_THREAD_NUM), 0);
13475 threadid = fold_convert (itype, threadid);
13476 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
13477 true, GSI_SAME_STMT);
13478
13479 tree startvar = fd.loop.v;
13480 t = fold_build2 (MULT_EXPR, itype, threadid, step);
13481 if (POINTER_TYPE_P (type))
13482 t = fold_build_pointer_plus (n1, t);
13483 else
13484 t = fold_build2 (PLUS_EXPR, type, t, n1);
13485 t = fold_convert (type, t);
13486 t = force_gimple_operand_gsi (&gsi, t,
13487 DECL_P (startvar)
13488 && TREE_ADDRESSABLE (startvar),
13489 NULL_TREE, true, GSI_SAME_STMT);
13490 gassign *assign_stmt = gimple_build_assign (startvar, t);
13491 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
13492
13493 /* Remove the omp for statement */
13494 gsi = gsi_last_bb (kfor->entry);
13495 gsi_remove (&gsi, true);
13496
13497 /* Remove the GIMPLE_OMP_CONTINUE statement. */
13498 gsi = gsi_last_bb (kfor->cont);
13499 gcc_assert (!gsi_end_p (gsi)
13500 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
13501 gsi_remove (&gsi, true);
13502
13503 /* Replace the GIMPLE_OMP_RETURN with a real return. */
13504 gsi = gsi_last_bb (kfor->exit);
13505 gcc_assert (!gsi_end_p (gsi)
13506 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13507 gsi_remove (&gsi, true);
13508
13509 /* Fixup the much simpler CFG. */
13510 remove_edge (find_edge (kfor->cont, body_bb));
13511
13512 if (kfor->cont != body_bb)
13513 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
13514 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
13515 }
13516
13517 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
13518 argument_decls. */
13519
13520 struct grid_arg_decl_map
13521 {
13522 tree old_arg;
13523 tree new_arg;
13524 };
13525
13526 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
13527 pertaining to kernel function. */
13528
13529 static tree
13530 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
13531 {
13532 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
13533 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
13534 tree t = *tp;
13535
13536 if (t == adm->old_arg)
13537 *tp = adm->new_arg;
13538 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
13539 return NULL_TREE;
13540 }
13541
13542 static void expand_omp (struct omp_region *region);
13543
13544 /* If TARGET region contains a kernel body for loop, remove its region from the
13545 TARGET and expand it in GPGPU kernel fashion. */
13546
13547 static void
13548 grid_expand_target_grid_body (struct omp_region *target)
13549 {
13550 if (!hsa_gen_requested_p ())
13551 return;
13552
13553 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
13554 struct omp_region **pp;
13555
13556 for (pp = &target->inner; *pp; pp = &(*pp)->next)
13557 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
13558 break;
13559
13560 struct omp_region *gpukernel = *pp;
13561
13562 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
13563 if (!gpukernel)
13564 {
13565 /* HSA cannot handle OACC stuff. */
13566 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
13567 return;
13568 gcc_checking_assert (orig_child_fndecl);
13569 gcc_assert (!find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
13570 OMP_CLAUSE__GRIDDIM_));
13571 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
13572
13573 hsa_register_kernel (n);
13574 return;
13575 }
13576
13577 gcc_assert (find_omp_clause (gimple_omp_target_clauses (tgt_stmt),
13578 OMP_CLAUSE__GRIDDIM_));
13579 tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry)));
13580 *pp = gpukernel->next;
13581 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
13582 if ((*pp)->type == GIMPLE_OMP_FOR)
13583 break;
13584
13585 struct omp_region *kfor = *pp;
13586 gcc_assert (kfor);
13587 gcc_assert (gimple_omp_for_kind (last_stmt ((kfor)->entry))
13588 == GF_OMP_FOR_KIND_GRID_LOOP);
13589 *pp = kfor->next;
13590 if (kfor->inner)
13591 expand_omp (kfor->inner);
13592 if (gpukernel->inner)
13593 expand_omp (gpukernel->inner);
13594
13595 tree kern_fndecl = copy_node (orig_child_fndecl);
13596 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
13597 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
13598 tree tgtblock = gimple_block (tgt_stmt);
13599 tree fniniblock = make_node (BLOCK);
13600 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
13601 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
13602 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
13603 DECL_INITIAL (kern_fndecl) = fniniblock;
13604 push_struct_function (kern_fndecl);
13605 cfun->function_end_locus = gimple_location (tgt_stmt);
13606 pop_cfun ();
13607
13608 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
13609 gcc_assert (!DECL_CHAIN (old_parm_decl));
13610 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
13611 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
13612 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
13613 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
13614 kern_cfun->curr_properties = cfun->curr_properties;
13615
13616 remove_edge (BRANCH_EDGE (kfor->entry));
13617 grid_expand_omp_for_loop (kfor);
13618
13619 /* Remove the omp for statement */
13620 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
13621 gsi_remove (&gsi, true);
13622 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
13623 return. */
13624 gsi = gsi_last_bb (gpukernel->exit);
13625 gcc_assert (!gsi_end_p (gsi)
13626 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
13627 gimple *ret_stmt = gimple_build_return (NULL);
13628 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
13629 gsi_remove (&gsi, true);
13630
13631 /* Statements in the first BB in the target construct have been produced by
13632 target lowering and must be copied inside the GPUKERNEL, with the two
13633 exceptions of the first OMP statement and the OMP_DATA assignment
13634 statement. */
13635 gsi = gsi_start_bb (single_succ (gpukernel->entry));
13636 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
13637 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
13638 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
13639 !gsi_end_p (tsi); gsi_next (&tsi))
13640 {
13641 gimple *stmt = gsi_stmt (tsi);
13642 if (is_gimple_omp (stmt))
13643 break;
13644 if (sender
13645 && is_gimple_assign (stmt)
13646 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
13647 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
13648 continue;
13649 gimple *copy = gimple_copy (stmt);
13650 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
13651 gimple_set_block (copy, fniniblock);
13652 }
13653
13654 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
13655 gpukernel->exit, inside_block);
13656
13657 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
13658 kcn->mark_force_output ();
13659 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
13660
13661 hsa_register_kernel (kcn, orig_child);
13662
13663 cgraph_node::add_new_function (kern_fndecl, true);
13664 push_cfun (kern_cfun);
13665 cgraph_edge::rebuild_edges ();
13666
13667 /* Re-map any mention of the PARM_DECL of the original function to the
13668 PARM_DECL of the new one.
13669
13670 TODO: It would be great if lowering produced references into the GPU
13671 kernel decl straight away and we did not have to do this. */
13672 struct grid_arg_decl_map adm;
13673 adm.old_arg = old_parm_decl;
13674 adm.new_arg = new_parm_decl;
13675 basic_block bb;
13676 FOR_EACH_BB_FN (bb, kern_cfun)
13677 {
13678 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
13679 {
13680 gimple *stmt = gsi_stmt (gsi);
13681 struct walk_stmt_info wi;
13682 memset (&wi, 0, sizeof (wi));
13683 wi.info = &adm;
13684 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
13685 }
13686 }
13687 pop_cfun ();
13688
13689 return;
13690 }
13691
13692 /* Expand the parallel region tree rooted at REGION. Expansion
13693 proceeds in depth-first order. Innermost regions are expanded
13694 first. This way, parallel regions that require a new function to
13695 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
13696 internal dependencies in their body. */
13697
13698 static void
13699 expand_omp (struct omp_region *region)
13700 {
13701 omp_any_child_fn_dumped = false;
13702 while (region)
13703 {
13704 location_t saved_location;
13705 gimple *inner_stmt = NULL;
13706
13707 /* First, determine whether this is a combined parallel+workshare
13708 region. */
13709 if (region->type == GIMPLE_OMP_PARALLEL)
13710 determine_parallel_type (region);
13711 else if (region->type == GIMPLE_OMP_TARGET)
13712 grid_expand_target_grid_body (region);
13713
13714 if (region->type == GIMPLE_OMP_FOR
13715 && gimple_omp_for_combined_p (last_stmt (region->entry)))
13716 inner_stmt = last_stmt (region->inner->entry);
13717
13718 if (region->inner)
13719 expand_omp (region->inner);
13720
13721 saved_location = input_location;
13722 if (gimple_has_location (last_stmt (region->entry)))
13723 input_location = gimple_location (last_stmt (region->entry));
13724
13725 switch (region->type)
13726 {
13727 case GIMPLE_OMP_PARALLEL:
13728 case GIMPLE_OMP_TASK:
13729 expand_omp_taskreg (region);
13730 break;
13731
13732 case GIMPLE_OMP_FOR:
13733 expand_omp_for (region, inner_stmt);
13734 break;
13735
13736 case GIMPLE_OMP_SECTIONS:
13737 expand_omp_sections (region);
13738 break;
13739
13740 case GIMPLE_OMP_SECTION:
13741 /* Individual omp sections are handled together with their
13742 parent GIMPLE_OMP_SECTIONS region. */
13743 break;
13744
13745 case GIMPLE_OMP_SINGLE:
13746 expand_omp_single (region);
13747 break;
13748
13749 case GIMPLE_OMP_ORDERED:
13750 {
13751 gomp_ordered *ord_stmt
13752 = as_a <gomp_ordered *> (last_stmt (region->entry));
13753 if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
13754 OMP_CLAUSE_DEPEND))
13755 {
13756 /* We'll expand these when expanding corresponding
13757 worksharing region with ordered(n) clause. */
13758 gcc_assert (region->outer
13759 && region->outer->type == GIMPLE_OMP_FOR);
13760 region->ord_stmt = ord_stmt;
13761 break;
13762 }
13763 }
13764 /* FALLTHRU */
13765 case GIMPLE_OMP_MASTER:
13766 case GIMPLE_OMP_TASKGROUP:
13767 case GIMPLE_OMP_CRITICAL:
13768 case GIMPLE_OMP_TEAMS:
13769 expand_omp_synch (region);
13770 break;
13771
13772 case GIMPLE_OMP_ATOMIC_LOAD:
13773 expand_omp_atomic (region);
13774 break;
13775
13776 case GIMPLE_OMP_TARGET:
13777 expand_omp_target (region);
13778 break;
13779
13780 default:
13781 gcc_unreachable ();
13782 }
13783
13784 input_location = saved_location;
13785 region = region->next;
13786 }
13787 if (omp_any_child_fn_dumped)
13788 {
13789 if (dump_file)
13790 dump_function_header (dump_file, current_function_decl, dump_flags);
13791 omp_any_child_fn_dumped = false;
13792 }
13793 }
13794
13795
13796 /* Helper for build_omp_regions. Scan the dominator tree starting at
13797 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
13798 true, the function ends once a single tree is built (otherwise, whole
13799 forest of OMP constructs may be built). */
13800
13801 static void
13802 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
13803 bool single_tree)
13804 {
13805 gimple_stmt_iterator gsi;
13806 gimple *stmt;
13807 basic_block son;
13808
13809 gsi = gsi_last_bb (bb);
13810 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
13811 {
13812 struct omp_region *region;
13813 enum gimple_code code;
13814
13815 stmt = gsi_stmt (gsi);
13816 code = gimple_code (stmt);
13817 if (code == GIMPLE_OMP_RETURN)
13818 {
13819 /* STMT is the return point out of region PARENT. Mark it
13820 as the exit point and make PARENT the immediately
13821 enclosing region. */
13822 gcc_assert (parent);
13823 region = parent;
13824 region->exit = bb;
13825 parent = parent->outer;
13826 }
13827 else if (code == GIMPLE_OMP_ATOMIC_STORE)
13828 {
13829 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
13830 GIMPLE_OMP_RETURN, but matches with
13831 GIMPLE_OMP_ATOMIC_LOAD. */
13832 gcc_assert (parent);
13833 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
13834 region = parent;
13835 region->exit = bb;
13836 parent = parent->outer;
13837 }
13838 else if (code == GIMPLE_OMP_CONTINUE)
13839 {
13840 gcc_assert (parent);
13841 parent->cont = bb;
13842 }
13843 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
13844 {
13845 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
13846 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
13847 }
13848 else
13849 {
13850 region = new_omp_region (bb, code, parent);
13851 /* Otherwise... */
13852 if (code == GIMPLE_OMP_TARGET)
13853 {
13854 switch (gimple_omp_target_kind (stmt))
13855 {
13856 case GF_OMP_TARGET_KIND_REGION:
13857 case GF_OMP_TARGET_KIND_DATA:
13858 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
13859 case GF_OMP_TARGET_KIND_OACC_KERNELS:
13860 case GF_OMP_TARGET_KIND_OACC_DATA:
13861 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
13862 break;
13863 case GF_OMP_TARGET_KIND_UPDATE:
13864 case GF_OMP_TARGET_KIND_ENTER_DATA:
13865 case GF_OMP_TARGET_KIND_EXIT_DATA:
13866 case GF_OMP_TARGET_KIND_OACC_UPDATE:
13867 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
13868 case GF_OMP_TARGET_KIND_OACC_DECLARE:
13869 /* ..., other than for those stand-alone directives... */
13870 region = NULL;
13871 break;
13872 default:
13873 gcc_unreachable ();
13874 }
13875 }
13876 else if (code == GIMPLE_OMP_ORDERED
13877 && find_omp_clause (gimple_omp_ordered_clauses
13878 (as_a <gomp_ordered *> (stmt)),
13879 OMP_CLAUSE_DEPEND))
13880 /* #pragma omp ordered depend is also just a stand-alone
13881 directive. */
13882 region = NULL;
13883 /* ..., this directive becomes the parent for a new region. */
13884 if (region)
13885 parent = region;
13886 }
13887 }
13888
13889 if (single_tree && !parent)
13890 return;
13891
13892 for (son = first_dom_son (CDI_DOMINATORS, bb);
13893 son;
13894 son = next_dom_son (CDI_DOMINATORS, son))
13895 build_omp_regions_1 (son, parent, single_tree);
13896 }
13897
13898 /* Builds the tree of OMP regions rooted at ROOT, storing it to
13899 root_omp_region. */
13900
13901 static void
13902 build_omp_regions_root (basic_block root)
13903 {
13904 gcc_assert (root_omp_region == NULL);
13905 build_omp_regions_1 (root, NULL, true);
13906 gcc_assert (root_omp_region != NULL);
13907 }
13908
13909 /* Expands omp construct (and its subconstructs) starting in HEAD. */
13910
13911 void
13912 omp_expand_local (basic_block head)
13913 {
13914 build_omp_regions_root (head);
13915 if (dump_file && (dump_flags & TDF_DETAILS))
13916 {
13917 fprintf (dump_file, "\nOMP region tree\n\n");
13918 dump_omp_region (dump_file, root_omp_region, 0);
13919 fprintf (dump_file, "\n");
13920 }
13921
13922 remove_exit_barriers (root_omp_region);
13923 expand_omp (root_omp_region);
13924
13925 free_omp_regions ();
13926 }
13927
13928 /* Scan the CFG and build a tree of OMP regions. Return the root of
13929 the OMP region tree. */
13930
13931 static void
13932 build_omp_regions (void)
13933 {
13934 gcc_assert (root_omp_region == NULL);
13935 calculate_dominance_info (CDI_DOMINATORS);
13936 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
13937 }
13938
13939 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
13940
13941 static unsigned int
13942 execute_expand_omp (void)
13943 {
13944 build_omp_regions ();
13945
13946 if (!root_omp_region)
13947 return 0;
13948
13949 if (dump_file)
13950 {
13951 fprintf (dump_file, "\nOMP region tree\n\n");
13952 dump_omp_region (dump_file, root_omp_region, 0);
13953 fprintf (dump_file, "\n");
13954 }
13955
13956 remove_exit_barriers (root_omp_region);
13957
13958 expand_omp (root_omp_region);
13959
13960 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
13961 verify_loop_structure ();
13962 cleanup_tree_cfg ();
13963
13964 free_omp_regions ();
13965
13966 return 0;
13967 }
13968
13969 /* OMP expansion -- the default pass, run before creation of SSA form. */
13970
13971 namespace {
13972
13973 const pass_data pass_data_expand_omp =
13974 {
13975 GIMPLE_PASS, /* type */
13976 "ompexp", /* name */
13977 OPTGROUP_NONE, /* optinfo_flags */
13978 TV_NONE, /* tv_id */
13979 PROP_gimple_any, /* properties_required */
13980 PROP_gimple_eomp, /* properties_provided */
13981 0, /* properties_destroyed */
13982 0, /* todo_flags_start */
13983 0, /* todo_flags_finish */
13984 };
13985
13986 class pass_expand_omp : public gimple_opt_pass
13987 {
13988 public:
13989 pass_expand_omp (gcc::context *ctxt)
13990 : gimple_opt_pass (pass_data_expand_omp, ctxt)
13991 {}
13992
13993 /* opt_pass methods: */
13994 virtual unsigned int execute (function *)
13995 {
13996 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
13997 || flag_openmp_simd != 0)
13998 && !seen_error ());
13999
14000 /* This pass always runs, to provide PROP_gimple_eomp.
14001 But often, there is nothing to do. */
14002 if (!gate)
14003 return 0;
14004
14005 return execute_expand_omp ();
14006 }
14007
14008 }; // class pass_expand_omp
14009
14010 } // anon namespace
14011
14012 gimple_opt_pass *
14013 make_pass_expand_omp (gcc::context *ctxt)
14014 {
14015 return new pass_expand_omp (ctxt);
14016 }
14017
14018 namespace {
14019
14020 const pass_data pass_data_expand_omp_ssa =
14021 {
14022 GIMPLE_PASS, /* type */
14023 "ompexpssa", /* name */
14024 OPTGROUP_NONE, /* optinfo_flags */
14025 TV_NONE, /* tv_id */
14026 PROP_cfg | PROP_ssa, /* properties_required */
14027 PROP_gimple_eomp, /* properties_provided */
14028 0, /* properties_destroyed */
14029 0, /* todo_flags_start */
14030 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
14031 };
14032
14033 class pass_expand_omp_ssa : public gimple_opt_pass
14034 {
14035 public:
14036 pass_expand_omp_ssa (gcc::context *ctxt)
14037 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
14038 {}
14039
14040 /* opt_pass methods: */
14041 virtual bool gate (function *fun)
14042 {
14043 return !(fun->curr_properties & PROP_gimple_eomp);
14044 }
14045 virtual unsigned int execute (function *) { return execute_expand_omp (); }
14046 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
14047
14048 }; // class pass_expand_omp_ssa
14049
14050 } // anon namespace
14051
14052 gimple_opt_pass *
14053 make_pass_expand_omp_ssa (gcc::context *ctxt)
14054 {
14055 return new pass_expand_omp_ssa (ctxt);
14056 }
14057 \f
14058 /* Routines to lower OMP directives into OMP-GIMPLE. */
14059
14060 /* If ctx is a worksharing context inside of a cancellable parallel
14061 region and it isn't nowait, add lhs to its GIMPLE_OMP_RETURN
14062 and conditional branch to parallel's cancel_label to handle
14063 cancellation in the implicit barrier. */
14064
14065 static void
14066 maybe_add_implicit_barrier_cancel (omp_context *ctx, gimple_seq *body)
14067 {
14068 gimple *omp_return = gimple_seq_last_stmt (*body);
14069 gcc_assert (gimple_code (omp_return) == GIMPLE_OMP_RETURN);
14070 if (gimple_omp_return_nowait_p (omp_return))
14071 return;
14072 if (ctx->outer
14073 && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_PARALLEL
14074 && ctx->outer->cancellable)
14075 {
14076 tree fndecl = builtin_decl_explicit (BUILT_IN_GOMP_CANCEL);
14077 tree c_bool_type = TREE_TYPE (TREE_TYPE (fndecl));
14078 tree lhs = create_tmp_var (c_bool_type);
14079 gimple_omp_return_set_lhs (omp_return, lhs);
14080 tree fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
14081 gimple *g = gimple_build_cond (NE_EXPR, lhs,
14082 fold_convert (c_bool_type,
14083 boolean_false_node),
14084 ctx->outer->cancel_label, fallthru_label);
14085 gimple_seq_add_stmt (body, g);
14086 gimple_seq_add_stmt (body, gimple_build_label (fallthru_label));
14087 }
14088 }
14089
14090 /* Lower the OpenMP sections directive in the current statement in GSI_P.
14091 CTX is the enclosing OMP context for the current statement. */
14092
14093 static void
14094 lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14095 {
14096 tree block, control;
14097 gimple_stmt_iterator tgsi;
14098 gomp_sections *stmt;
14099 gimple *t;
14100 gbind *new_stmt, *bind;
14101 gimple_seq ilist, dlist, olist, new_body;
14102
14103 stmt = as_a <gomp_sections *> (gsi_stmt (*gsi_p));
14104
14105 push_gimplify_context ();
14106
14107 dlist = NULL;
14108 ilist = NULL;
14109 lower_rec_input_clauses (gimple_omp_sections_clauses (stmt),
14110 &ilist, &dlist, ctx, NULL);
14111
14112 new_body = gimple_omp_body (stmt);
14113 gimple_omp_set_body (stmt, NULL);
14114 tgsi = gsi_start (new_body);
14115 for (; !gsi_end_p (tgsi); gsi_next (&tgsi))
14116 {
14117 omp_context *sctx;
14118 gimple *sec_start;
14119
14120 sec_start = gsi_stmt (tgsi);
14121 sctx = maybe_lookup_ctx (sec_start);
14122 gcc_assert (sctx);
14123
14124 lower_omp (gimple_omp_body_ptr (sec_start), sctx);
14125 gsi_insert_seq_after (&tgsi, gimple_omp_body (sec_start),
14126 GSI_CONTINUE_LINKING);
14127 gimple_omp_set_body (sec_start, NULL);
14128
14129 if (gsi_one_before_end_p (tgsi))
14130 {
14131 gimple_seq l = NULL;
14132 lower_lastprivate_clauses (gimple_omp_sections_clauses (stmt), NULL,
14133 &l, ctx);
14134 gsi_insert_seq_after (&tgsi, l, GSI_CONTINUE_LINKING);
14135 gimple_omp_section_set_last (sec_start);
14136 }
14137
14138 gsi_insert_after (&tgsi, gimple_build_omp_return (false),
14139 GSI_CONTINUE_LINKING);
14140 }
14141
14142 block = make_node (BLOCK);
14143 bind = gimple_build_bind (NULL, new_body, block);
14144
14145 olist = NULL;
14146 lower_reduction_clauses (gimple_omp_sections_clauses (stmt), &olist, ctx);
14147
14148 block = make_node (BLOCK);
14149 new_stmt = gimple_build_bind (NULL, NULL, block);
14150 gsi_replace (gsi_p, new_stmt, true);
14151
14152 pop_gimplify_context (new_stmt);
14153 gimple_bind_append_vars (new_stmt, ctx->block_vars);
14154 BLOCK_VARS (block) = gimple_bind_vars (bind);
14155 if (BLOCK_VARS (block))
14156 TREE_USED (block) = 1;
14157
14158 new_body = NULL;
14159 gimple_seq_add_seq (&new_body, ilist);
14160 gimple_seq_add_stmt (&new_body, stmt);
14161 gimple_seq_add_stmt (&new_body, gimple_build_omp_sections_switch ());
14162 gimple_seq_add_stmt (&new_body, bind);
14163
14164 control = create_tmp_var (unsigned_type_node, ".section");
14165 t = gimple_build_omp_continue (control, control);
14166 gimple_omp_sections_set_control (stmt, control);
14167 gimple_seq_add_stmt (&new_body, t);
14168
14169 gimple_seq_add_seq (&new_body, olist);
14170 if (ctx->cancellable)
14171 gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
14172 gimple_seq_add_seq (&new_body, dlist);
14173
14174 new_body = maybe_catch_exception (new_body);
14175
14176 t = gimple_build_omp_return
14177 (!!find_omp_clause (gimple_omp_sections_clauses (stmt),
14178 OMP_CLAUSE_NOWAIT));
14179 gimple_seq_add_stmt (&new_body, t);
14180 maybe_add_implicit_barrier_cancel (ctx, &new_body);
14181
14182 gimple_bind_set_body (new_stmt, new_body);
14183 }
14184
14185
14186 /* A subroutine of lower_omp_single. Expand the simple form of
14187 a GIMPLE_OMP_SINGLE, without a copyprivate clause:
14188
14189 if (GOMP_single_start ())
14190 BODY;
14191 [ GOMP_barrier (); ] -> unless 'nowait' is present.
14192
14193 FIXME. It may be better to delay expanding the logic of this until
14194 pass_expand_omp. The expanded logic may make the job more difficult
14195 to a synchronization analysis pass. */
14196
14197 static void
14198 lower_omp_single_simple (gomp_single *single_stmt, gimple_seq *pre_p)
14199 {
14200 location_t loc = gimple_location (single_stmt);
14201 tree tlabel = create_artificial_label (loc);
14202 tree flabel = create_artificial_label (loc);
14203 gimple *call, *cond;
14204 tree lhs, decl;
14205
14206 decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_START);
14207 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (decl)));
14208 call = gimple_build_call (decl, 0);
14209 gimple_call_set_lhs (call, lhs);
14210 gimple_seq_add_stmt (pre_p, call);
14211
14212 cond = gimple_build_cond (EQ_EXPR, lhs,
14213 fold_convert_loc (loc, TREE_TYPE (lhs),
14214 boolean_true_node),
14215 tlabel, flabel);
14216 gimple_seq_add_stmt (pre_p, cond);
14217 gimple_seq_add_stmt (pre_p, gimple_build_label (tlabel));
14218 gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
14219 gimple_seq_add_stmt (pre_p, gimple_build_label (flabel));
14220 }
14221
14222
14223 /* A subroutine of lower_omp_single. Expand the simple form of
14224 a GIMPLE_OMP_SINGLE, with a copyprivate clause:
14225
14226 #pragma omp single copyprivate (a, b, c)
14227
14228 Create a new structure to hold copies of 'a', 'b' and 'c' and emit:
14229
14230 {
14231 if ((copyout_p = GOMP_single_copy_start ()) == NULL)
14232 {
14233 BODY;
14234 copyout.a = a;
14235 copyout.b = b;
14236 copyout.c = c;
14237 GOMP_single_copy_end (&copyout);
14238 }
14239 else
14240 {
14241 a = copyout_p->a;
14242 b = copyout_p->b;
14243 c = copyout_p->c;
14244 }
14245 GOMP_barrier ();
14246 }
14247
14248 FIXME. It may be better to delay expanding the logic of this until
14249 pass_expand_omp. The expanded logic may make the job more difficult
14250 to a synchronization analysis pass. */
14251
14252 static void
14253 lower_omp_single_copy (gomp_single *single_stmt, gimple_seq *pre_p,
14254 omp_context *ctx)
14255 {
14256 tree ptr_type, t, l0, l1, l2, bfn_decl;
14257 gimple_seq copyin_seq;
14258 location_t loc = gimple_location (single_stmt);
14259
14260 ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_copy_o");
14261
14262 ptr_type = build_pointer_type (ctx->record_type);
14263 ctx->receiver_decl = create_tmp_var (ptr_type, ".omp_copy_i");
14264
14265 l0 = create_artificial_label (loc);
14266 l1 = create_artificial_label (loc);
14267 l2 = create_artificial_label (loc);
14268
14269 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_COPY_START);
14270 t = build_call_expr_loc (loc, bfn_decl, 0);
14271 t = fold_convert_loc (loc, ptr_type, t);
14272 gimplify_assign (ctx->receiver_decl, t, pre_p);
14273
14274 t = build2 (EQ_EXPR, boolean_type_node, ctx->receiver_decl,
14275 build_int_cst (ptr_type, 0));
14276 t = build3 (COND_EXPR, void_type_node, t,
14277 build_and_jump (&l0), build_and_jump (&l1));
14278 gimplify_and_add (t, pre_p);
14279
14280 gimple_seq_add_stmt (pre_p, gimple_build_label (l0));
14281
14282 gimple_seq_add_seq (pre_p, gimple_omp_body (single_stmt));
14283
14284 copyin_seq = NULL;
14285 lower_copyprivate_clauses (gimple_omp_single_clauses (single_stmt), pre_p,
14286 &copyin_seq, ctx);
14287
14288 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
14289 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SINGLE_COPY_END);
14290 t = build_call_expr_loc (loc, bfn_decl, 1, t);
14291 gimplify_and_add (t, pre_p);
14292
14293 t = build_and_jump (&l2);
14294 gimplify_and_add (t, pre_p);
14295
14296 gimple_seq_add_stmt (pre_p, gimple_build_label (l1));
14297
14298 gimple_seq_add_seq (pre_p, copyin_seq);
14299
14300 gimple_seq_add_stmt (pre_p, gimple_build_label (l2));
14301 }
14302
14303
14304 /* Expand code for an OpenMP single directive. */
14305
14306 static void
14307 lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14308 {
14309 tree block;
14310 gimple *t;
14311 gomp_single *single_stmt = as_a <gomp_single *> (gsi_stmt (*gsi_p));
14312 gbind *bind;
14313 gimple_seq bind_body, bind_body_tail = NULL, dlist;
14314
14315 push_gimplify_context ();
14316
14317 block = make_node (BLOCK);
14318 bind = gimple_build_bind (NULL, NULL, block);
14319 gsi_replace (gsi_p, bind, true);
14320 bind_body = NULL;
14321 dlist = NULL;
14322 lower_rec_input_clauses (gimple_omp_single_clauses (single_stmt),
14323 &bind_body, &dlist, ctx, NULL);
14324 lower_omp (gimple_omp_body_ptr (single_stmt), ctx);
14325
14326 gimple_seq_add_stmt (&bind_body, single_stmt);
14327
14328 if (ctx->record_type)
14329 lower_omp_single_copy (single_stmt, &bind_body, ctx);
14330 else
14331 lower_omp_single_simple (single_stmt, &bind_body);
14332
14333 gimple_omp_set_body (single_stmt, NULL);
14334
14335 gimple_seq_add_seq (&bind_body, dlist);
14336
14337 bind_body = maybe_catch_exception (bind_body);
14338
14339 t = gimple_build_omp_return
14340 (!!find_omp_clause (gimple_omp_single_clauses (single_stmt),
14341 OMP_CLAUSE_NOWAIT));
14342 gimple_seq_add_stmt (&bind_body_tail, t);
14343 maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail);
14344 if (ctx->record_type)
14345 {
14346 gimple_stmt_iterator gsi = gsi_start (bind_body_tail);
14347 tree clobber = build_constructor (ctx->record_type, NULL);
14348 TREE_THIS_VOLATILE (clobber) = 1;
14349 gsi_insert_after (&gsi, gimple_build_assign (ctx->sender_decl,
14350 clobber), GSI_SAME_STMT);
14351 }
14352 gimple_seq_add_seq (&bind_body, bind_body_tail);
14353 gimple_bind_set_body (bind, bind_body);
14354
14355 pop_gimplify_context (bind);
14356
14357 gimple_bind_append_vars (bind, ctx->block_vars);
14358 BLOCK_VARS (block) = ctx->block_vars;
14359 if (BLOCK_VARS (block))
14360 TREE_USED (block) = 1;
14361 }
14362
14363
14364 /* Expand code for an OpenMP master directive. */
14365
14366 static void
14367 lower_omp_master (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14368 {
14369 tree block, lab = NULL, x, bfn_decl;
14370 gimple *stmt = gsi_stmt (*gsi_p);
14371 gbind *bind;
14372 location_t loc = gimple_location (stmt);
14373 gimple_seq tseq;
14374
14375 push_gimplify_context ();
14376
14377 block = make_node (BLOCK);
14378 bind = gimple_build_bind (NULL, NULL, block);
14379 gsi_replace (gsi_p, bind, true);
14380 gimple_bind_add_stmt (bind, stmt);
14381
14382 bfn_decl = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
14383 x = build_call_expr_loc (loc, bfn_decl, 0);
14384 x = build2 (EQ_EXPR, boolean_type_node, x, integer_zero_node);
14385 x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab));
14386 tseq = NULL;
14387 gimplify_and_add (x, &tseq);
14388 gimple_bind_add_seq (bind, tseq);
14389
14390 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14391 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
14392 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14393 gimple_omp_set_body (stmt, NULL);
14394
14395 gimple_bind_add_stmt (bind, gimple_build_label (lab));
14396
14397 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14398
14399 pop_gimplify_context (bind);
14400
14401 gimple_bind_append_vars (bind, ctx->block_vars);
14402 BLOCK_VARS (block) = ctx->block_vars;
14403 }
14404
14405
14406 /* Expand code for an OpenMP taskgroup directive. */
14407
14408 static void
14409 lower_omp_taskgroup (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14410 {
14411 gimple *stmt = gsi_stmt (*gsi_p);
14412 gcall *x;
14413 gbind *bind;
14414 tree block = make_node (BLOCK);
14415
14416 bind = gimple_build_bind (NULL, NULL, block);
14417 gsi_replace (gsi_p, bind, true);
14418 gimple_bind_add_stmt (bind, stmt);
14419
14420 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_TASKGROUP_START),
14421 0);
14422 gimple_bind_add_stmt (bind, x);
14423
14424 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14425 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14426 gimple_omp_set_body (stmt, NULL);
14427
14428 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14429
14430 gimple_bind_append_vars (bind, ctx->block_vars);
14431 BLOCK_VARS (block) = ctx->block_vars;
14432 }
14433
14434
14435 /* Fold the OMP_ORDERED_CLAUSES for the OMP_ORDERED in STMT if possible. */
14436
14437 static void
14438 lower_omp_ordered_clauses (gimple_stmt_iterator *gsi_p, gomp_ordered *ord_stmt,
14439 omp_context *ctx)
14440 {
14441 struct omp_for_data fd;
14442 if (!ctx->outer || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR)
14443 return;
14444
14445 unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt);
14446 struct omp_for_data_loop *loops = XALLOCAVEC (struct omp_for_data_loop, len);
14447 extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops);
14448 if (!fd.ordered)
14449 return;
14450
14451 tree *list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
14452 tree c = gimple_omp_ordered_clauses (ord_stmt);
14453 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
14454 && OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
14455 {
14456 /* Merge depend clauses from multiple adjacent
14457 #pragma omp ordered depend(sink:...) constructs
14458 into one #pragma omp ordered depend(sink:...), so that
14459 we can optimize them together. */
14460 gimple_stmt_iterator gsi = *gsi_p;
14461 gsi_next (&gsi);
14462 while (!gsi_end_p (gsi))
14463 {
14464 gimple *stmt = gsi_stmt (gsi);
14465 if (is_gimple_debug (stmt)
14466 || gimple_code (stmt) == GIMPLE_NOP)
14467 {
14468 gsi_next (&gsi);
14469 continue;
14470 }
14471 if (gimple_code (stmt) != GIMPLE_OMP_ORDERED)
14472 break;
14473 gomp_ordered *ord_stmt2 = as_a <gomp_ordered *> (stmt);
14474 c = gimple_omp_ordered_clauses (ord_stmt2);
14475 if (c == NULL_TREE
14476 || OMP_CLAUSE_CODE (c) != OMP_CLAUSE_DEPEND
14477 || OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
14478 break;
14479 while (*list_p)
14480 list_p = &OMP_CLAUSE_CHAIN (*list_p);
14481 *list_p = c;
14482 gsi_remove (&gsi, true);
14483 }
14484 }
14485
14486 /* Canonicalize sink dependence clauses into one folded clause if
14487 possible.
14488
14489 The basic algorithm is to create a sink vector whose first
14490 element is the GCD of all the first elements, and whose remaining
14491 elements are the minimum of the subsequent columns.
14492
14493 We ignore dependence vectors whose first element is zero because
14494 such dependencies are known to be executed by the same thread.
14495
14496 We take into account the direction of the loop, so a minimum
14497 becomes a maximum if the loop is iterating forwards. We also
14498 ignore sink clauses where the loop direction is unknown, or where
14499 the offsets are clearly invalid because they are not a multiple
14500 of the loop increment.
14501
14502 For example:
14503
14504 #pragma omp for ordered(2)
14505 for (i=0; i < N; ++i)
14506 for (j=0; j < M; ++j)
14507 {
14508 #pragma omp ordered \
14509 depend(sink:i-8,j-2) \
14510 depend(sink:i,j-1) \ // Completely ignored because i+0.
14511 depend(sink:i-4,j-3) \
14512 depend(sink:i-6,j-4)
14513 #pragma omp ordered depend(source)
14514 }
14515
14516 Folded clause is:
14517
14518 depend(sink:-gcd(8,4,6),-min(2,3,4))
14519 -or-
14520 depend(sink:-2,-2)
14521 */
14522
14523 /* FIXME: Computing GCD's where the first element is zero is
14524 non-trivial in the presence of collapsed loops. Do this later. */
14525 if (fd.collapse > 1)
14526 return;
14527
14528 wide_int *folded_deps = XALLOCAVEC (wide_int, 2 * len - 1);
14529 memset (folded_deps, 0, sizeof (*folded_deps) * (2 * len - 1));
14530 tree folded_dep = NULL_TREE;
14531 /* TRUE if the first dimension's offset is negative. */
14532 bool neg_offset_p = false;
14533
14534 list_p = gimple_omp_ordered_clauses_ptr (ord_stmt);
14535 unsigned int i;
14536 while ((c = *list_p) != NULL)
14537 {
14538 bool remove = false;
14539
14540 gcc_assert (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND);
14541 if (OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_SINK)
14542 goto next_ordered_clause;
14543
14544 tree vec;
14545 for (vec = OMP_CLAUSE_DECL (c), i = 0;
14546 vec && TREE_CODE (vec) == TREE_LIST;
14547 vec = TREE_CHAIN (vec), ++i)
14548 {
14549 gcc_assert (i < len);
14550
14551 /* extract_omp_for_data has canonicalized the condition. */
14552 gcc_assert (fd.loops[i].cond_code == LT_EXPR
14553 || fd.loops[i].cond_code == GT_EXPR);
14554 bool forward = fd.loops[i].cond_code == LT_EXPR;
14555 bool maybe_lexically_later = true;
14556
14557 /* While the committee makes up its mind, bail if we have any
14558 non-constant steps. */
14559 if (TREE_CODE (fd.loops[i].step) != INTEGER_CST)
14560 goto lower_omp_ordered_ret;
14561
14562 tree itype = TREE_TYPE (TREE_VALUE (vec));
14563 if (POINTER_TYPE_P (itype))
14564 itype = sizetype;
14565 wide_int offset = wide_int::from (TREE_PURPOSE (vec),
14566 TYPE_PRECISION (itype),
14567 TYPE_SIGN (itype));
14568
14569 /* Ignore invalid offsets that are not multiples of the step. */
14570 if (!wi::multiple_of_p
14571 (wi::abs (offset), wi::abs ((wide_int) fd.loops[i].step),
14572 UNSIGNED))
14573 {
14574 warning_at (OMP_CLAUSE_LOCATION (c), 0,
14575 "ignoring sink clause with offset that is not "
14576 "a multiple of the loop step");
14577 remove = true;
14578 goto next_ordered_clause;
14579 }
14580
14581 /* Calculate the first dimension. The first dimension of
14582 the folded dependency vector is the GCD of the first
14583 elements, while ignoring any first elements whose offset
14584 is 0. */
14585 if (i == 0)
14586 {
14587 /* Ignore dependence vectors whose first dimension is 0. */
14588 if (offset == 0)
14589 {
14590 remove = true;
14591 goto next_ordered_clause;
14592 }
14593 else
14594 {
14595 if (!TYPE_UNSIGNED (itype) && (forward ^ wi::neg_p (offset)))
14596 {
14597 error_at (OMP_CLAUSE_LOCATION (c),
14598 "first offset must be in opposite direction "
14599 "of loop iterations");
14600 goto lower_omp_ordered_ret;
14601 }
14602 if (forward)
14603 offset = -offset;
14604 neg_offset_p = forward;
14605 /* Initialize the first time around. */
14606 if (folded_dep == NULL_TREE)
14607 {
14608 folded_dep = c;
14609 folded_deps[0] = offset;
14610 }
14611 else
14612 folded_deps[0] = wi::gcd (folded_deps[0],
14613 offset, UNSIGNED);
14614 }
14615 }
14616 /* Calculate minimum for the remaining dimensions. */
14617 else
14618 {
14619 folded_deps[len + i - 1] = offset;
14620 if (folded_dep == c)
14621 folded_deps[i] = offset;
14622 else if (maybe_lexically_later
14623 && !wi::eq_p (folded_deps[i], offset))
14624 {
14625 if (forward ^ wi::gts_p (folded_deps[i], offset))
14626 {
14627 unsigned int j;
14628 folded_dep = c;
14629 for (j = 1; j <= i; j++)
14630 folded_deps[j] = folded_deps[len + j - 1];
14631 }
14632 else
14633 maybe_lexically_later = false;
14634 }
14635 }
14636 }
14637 gcc_assert (i == len);
14638
14639 remove = true;
14640
14641 next_ordered_clause:
14642 if (remove)
14643 *list_p = OMP_CLAUSE_CHAIN (c);
14644 else
14645 list_p = &OMP_CLAUSE_CHAIN (c);
14646 }
14647
14648 if (folded_dep)
14649 {
14650 if (neg_offset_p)
14651 folded_deps[0] = -folded_deps[0];
14652
14653 tree itype = TREE_TYPE (TREE_VALUE (OMP_CLAUSE_DECL (folded_dep)));
14654 if (POINTER_TYPE_P (itype))
14655 itype = sizetype;
14656
14657 TREE_PURPOSE (OMP_CLAUSE_DECL (folded_dep))
14658 = wide_int_to_tree (itype, folded_deps[0]);
14659 OMP_CLAUSE_CHAIN (folded_dep) = gimple_omp_ordered_clauses (ord_stmt);
14660 *gimple_omp_ordered_clauses_ptr (ord_stmt) = folded_dep;
14661 }
14662
14663 lower_omp_ordered_ret:
14664
14665 /* Ordered without clauses is #pragma omp threads, while we want
14666 a nop instead if we remove all clauses. */
14667 if (gimple_omp_ordered_clauses (ord_stmt) == NULL_TREE)
14668 gsi_replace (gsi_p, gimple_build_nop (), true);
14669 }
14670
14671
14672 /* Expand code for an OpenMP ordered directive. */
14673
14674 static void
14675 lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14676 {
14677 tree block;
14678 gimple *stmt = gsi_stmt (*gsi_p);
14679 gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt);
14680 gcall *x;
14681 gbind *bind;
14682 bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
14683 OMP_CLAUSE_SIMD);
14684 bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
14685 OMP_CLAUSE_THREADS);
14686
14687 if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt),
14688 OMP_CLAUSE_DEPEND))
14689 {
14690 /* FIXME: This is needs to be moved to the expansion to verify various
14691 conditions only testable on cfg with dominators computed, and also
14692 all the depend clauses to be merged still might need to be available
14693 for the runtime checks. */
14694 if (0)
14695 lower_omp_ordered_clauses (gsi_p, ord_stmt, ctx);
14696 return;
14697 }
14698
14699 push_gimplify_context ();
14700
14701 block = make_node (BLOCK);
14702 bind = gimple_build_bind (NULL, NULL, block);
14703 gsi_replace (gsi_p, bind, true);
14704 gimple_bind_add_stmt (bind, stmt);
14705
14706 if (simd)
14707 {
14708 x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_START, 1,
14709 build_int_cst (NULL_TREE, threads));
14710 cfun->has_simduid_loops = true;
14711 }
14712 else
14713 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_START),
14714 0);
14715 gimple_bind_add_stmt (bind, x);
14716
14717 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14718 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
14719 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14720 gimple_omp_set_body (stmt, NULL);
14721
14722 if (simd)
14723 x = gimple_build_call_internal (IFN_GOMP_SIMD_ORDERED_END, 1,
14724 build_int_cst (NULL_TREE, threads));
14725 else
14726 x = gimple_build_call (builtin_decl_explicit (BUILT_IN_GOMP_ORDERED_END),
14727 0);
14728 gimple_bind_add_stmt (bind, x);
14729
14730 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14731
14732 pop_gimplify_context (bind);
14733
14734 gimple_bind_append_vars (bind, ctx->block_vars);
14735 BLOCK_VARS (block) = gimple_bind_vars (bind);
14736 }
14737
14738
14739 /* Gimplify a GIMPLE_OMP_CRITICAL statement. This is a relatively simple
14740 substitution of a couple of function calls. But in the NAMED case,
14741 requires that languages coordinate a symbol name. It is therefore
14742 best put here in common code. */
14743
14744 static GTY(()) hash_map<tree, tree> *critical_name_mutexes;
14745
14746 static void
14747 lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14748 {
14749 tree block;
14750 tree name, lock, unlock;
14751 gomp_critical *stmt = as_a <gomp_critical *> (gsi_stmt (*gsi_p));
14752 gbind *bind;
14753 location_t loc = gimple_location (stmt);
14754 gimple_seq tbody;
14755
14756 name = gimple_omp_critical_name (stmt);
14757 if (name)
14758 {
14759 tree decl;
14760
14761 if (!critical_name_mutexes)
14762 critical_name_mutexes = hash_map<tree, tree>::create_ggc (10);
14763
14764 tree *n = critical_name_mutexes->get (name);
14765 if (n == NULL)
14766 {
14767 char *new_str;
14768
14769 decl = create_tmp_var_raw (ptr_type_node);
14770
14771 new_str = ACONCAT ((".gomp_critical_user_",
14772 IDENTIFIER_POINTER (name), NULL));
14773 DECL_NAME (decl) = get_identifier (new_str);
14774 TREE_PUBLIC (decl) = 1;
14775 TREE_STATIC (decl) = 1;
14776 DECL_COMMON (decl) = 1;
14777 DECL_ARTIFICIAL (decl) = 1;
14778 DECL_IGNORED_P (decl) = 1;
14779
14780 varpool_node::finalize_decl (decl);
14781
14782 critical_name_mutexes->put (name, decl);
14783 }
14784 else
14785 decl = *n;
14786
14787 /* If '#pragma omp critical' is inside offloaded region or
14788 inside function marked as offloadable, the symbol must be
14789 marked as offloadable too. */
14790 omp_context *octx;
14791 if (cgraph_node::get (current_function_decl)->offloadable)
14792 varpool_node::get_create (decl)->offloadable = 1;
14793 else
14794 for (octx = ctx->outer; octx; octx = octx->outer)
14795 if (is_gimple_omp_offloaded (octx->stmt))
14796 {
14797 varpool_node::get_create (decl)->offloadable = 1;
14798 break;
14799 }
14800
14801 lock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_NAME_START);
14802 lock = build_call_expr_loc (loc, lock, 1, build_fold_addr_expr_loc (loc, decl));
14803
14804 unlock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_NAME_END);
14805 unlock = build_call_expr_loc (loc, unlock, 1,
14806 build_fold_addr_expr_loc (loc, decl));
14807 }
14808 else
14809 {
14810 lock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_START);
14811 lock = build_call_expr_loc (loc, lock, 0);
14812
14813 unlock = builtin_decl_explicit (BUILT_IN_GOMP_CRITICAL_END);
14814 unlock = build_call_expr_loc (loc, unlock, 0);
14815 }
14816
14817 push_gimplify_context ();
14818
14819 block = make_node (BLOCK);
14820 bind = gimple_build_bind (NULL, NULL, block);
14821 gsi_replace (gsi_p, bind, true);
14822 gimple_bind_add_stmt (bind, stmt);
14823
14824 tbody = gimple_bind_body (bind);
14825 gimplify_and_add (lock, &tbody);
14826 gimple_bind_set_body (bind, tbody);
14827
14828 lower_omp (gimple_omp_body_ptr (stmt), ctx);
14829 gimple_omp_set_body (stmt, maybe_catch_exception (gimple_omp_body (stmt)));
14830 gimple_bind_add_seq (bind, gimple_omp_body (stmt));
14831 gimple_omp_set_body (stmt, NULL);
14832
14833 tbody = gimple_bind_body (bind);
14834 gimplify_and_add (unlock, &tbody);
14835 gimple_bind_set_body (bind, tbody);
14836
14837 gimple_bind_add_stmt (bind, gimple_build_omp_return (true));
14838
14839 pop_gimplify_context (bind);
14840 gimple_bind_append_vars (bind, ctx->block_vars);
14841 BLOCK_VARS (block) = gimple_bind_vars (bind);
14842 }
14843
14844
14845 /* A subroutine of lower_omp_for. Generate code to emit the predicate
14846 for a lastprivate clause. Given a loop control predicate of (V
14847 cond N2), we gate the clause on (!(V cond N2)). The lowered form
14848 is appended to *DLIST, iterator initialization is appended to
14849 *BODY_P. */
14850
14851 static void
14852 lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
14853 gimple_seq *dlist, struct omp_context *ctx)
14854 {
14855 tree clauses, cond, vinit;
14856 enum tree_code cond_code;
14857 gimple_seq stmts;
14858
14859 cond_code = fd->loop.cond_code;
14860 cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR;
14861
14862 /* When possible, use a strict equality expression. This can let VRP
14863 type optimizations deduce the value and remove a copy. */
14864 if (tree_fits_shwi_p (fd->loop.step))
14865 {
14866 HOST_WIDE_INT step = tree_to_shwi (fd->loop.step);
14867 if (step == 1 || step == -1)
14868 cond_code = EQ_EXPR;
14869 }
14870
14871 tree n2 = fd->loop.n2;
14872 if (fd->collapse > 1
14873 && TREE_CODE (n2) != INTEGER_CST
14874 && gimple_omp_for_combined_into_p (fd->for_stmt))
14875 {
14876 struct omp_context *taskreg_ctx = NULL;
14877 if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR)
14878 {
14879 gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt);
14880 if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR
14881 || gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_DISTRIBUTE)
14882 {
14883 if (gimple_omp_for_combined_into_p (gfor))
14884 {
14885 gcc_assert (ctx->outer->outer
14886 && is_parallel_ctx (ctx->outer->outer));
14887 taskreg_ctx = ctx->outer->outer;
14888 }
14889 else
14890 {
14891 struct omp_for_data outer_fd;
14892 extract_omp_for_data (gfor, &outer_fd, NULL);
14893 n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2);
14894 }
14895 }
14896 else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP)
14897 taskreg_ctx = ctx->outer->outer;
14898 }
14899 else if (is_taskreg_ctx (ctx->outer))
14900 taskreg_ctx = ctx->outer;
14901 if (taskreg_ctx)
14902 {
14903 int i;
14904 tree innerc
14905 = find_omp_clause (gimple_omp_taskreg_clauses (taskreg_ctx->stmt),
14906 OMP_CLAUSE__LOOPTEMP_);
14907 gcc_assert (innerc);
14908 for (i = 0; i < fd->collapse; i++)
14909 {
14910 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
14911 OMP_CLAUSE__LOOPTEMP_);
14912 gcc_assert (innerc);
14913 }
14914 innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc),
14915 OMP_CLAUSE__LOOPTEMP_);
14916 if (innerc)
14917 n2 = fold_convert (TREE_TYPE (n2),
14918 lookup_decl (OMP_CLAUSE_DECL (innerc),
14919 taskreg_ctx));
14920 }
14921 }
14922 cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2);
14923
14924 clauses = gimple_omp_for_clauses (fd->for_stmt);
14925 stmts = NULL;
14926 lower_lastprivate_clauses (clauses, cond, &stmts, ctx);
14927 if (!gimple_seq_empty_p (stmts))
14928 {
14929 gimple_seq_add_seq (&stmts, *dlist);
14930 *dlist = stmts;
14931
14932 /* Optimize: v = 0; is usually cheaper than v = some_other_constant. */
14933 vinit = fd->loop.n1;
14934 if (cond_code == EQ_EXPR
14935 && tree_fits_shwi_p (fd->loop.n2)
14936 && ! integer_zerop (fd->loop.n2))
14937 vinit = build_int_cst (TREE_TYPE (fd->loop.v), 0);
14938 else
14939 vinit = unshare_expr (vinit);
14940
14941 /* Initialize the iterator variable, so that threads that don't execute
14942 any iterations don't execute the lastprivate clauses by accident. */
14943 gimplify_assign (fd->loop.v, vinit, body_p);
14944 }
14945 }
14946
14947
14948 /* Lower code for an OMP loop directive. */
14949
14950 static void
14951 lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
14952 {
14953 tree *rhs_p, block;
14954 struct omp_for_data fd, *fdp = NULL;
14955 gomp_for *stmt = as_a <gomp_for *> (gsi_stmt (*gsi_p));
14956 gbind *new_stmt;
14957 gimple_seq omp_for_body, body, dlist;
14958 gimple_seq oacc_head = NULL, oacc_tail = NULL;
14959 size_t i;
14960
14961 push_gimplify_context ();
14962
14963 lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
14964
14965 block = make_node (BLOCK);
14966 new_stmt = gimple_build_bind (NULL, NULL, block);
14967 /* Replace at gsi right away, so that 'stmt' is no member
14968 of a sequence anymore as we're going to add to a different
14969 one below. */
14970 gsi_replace (gsi_p, new_stmt, true);
14971
14972 /* Move declaration of temporaries in the loop body before we make
14973 it go away. */
14974 omp_for_body = gimple_omp_body (stmt);
14975 if (!gimple_seq_empty_p (omp_for_body)
14976 && gimple_code (gimple_seq_first_stmt (omp_for_body)) == GIMPLE_BIND)
14977 {
14978 gbind *inner_bind
14979 = as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
14980 tree vars = gimple_bind_vars (inner_bind);
14981 gimple_bind_append_vars (new_stmt, vars);
14982 /* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
14983 keep them on the inner_bind and it's block. */
14984 gimple_bind_set_vars (inner_bind, NULL_TREE);
14985 if (gimple_bind_block (inner_bind))
14986 BLOCK_VARS (gimple_bind_block (inner_bind)) = NULL_TREE;
14987 }
14988
14989 if (gimple_omp_for_combined_into_p (stmt))
14990 {
14991 extract_omp_for_data (stmt, &fd, NULL);
14992 fdp = &fd;
14993
14994 /* We need two temporaries with fd.loop.v type (istart/iend)
14995 and then (fd.collapse - 1) temporaries with the same
14996 type for count2 ... countN-1 vars if not constant. */
14997 size_t count = 2;
14998 tree type = fd.iter_type;
14999 if (fd.collapse > 1
15000 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
15001 count += fd.collapse - 1;
15002 bool taskreg_for
15003 = (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR
15004 || gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_TASKLOOP);
15005 tree outerc = NULL, *pc = gimple_omp_for_clauses_ptr (stmt);
15006 tree clauses = *pc;
15007 if (taskreg_for)
15008 outerc
15009 = find_omp_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt),
15010 OMP_CLAUSE__LOOPTEMP_);
15011 for (i = 0; i < count; i++)
15012 {
15013 tree temp;
15014 if (taskreg_for)
15015 {
15016 gcc_assert (outerc);
15017 temp = lookup_decl (OMP_CLAUSE_DECL (outerc), ctx->outer);
15018 outerc = find_omp_clause (OMP_CLAUSE_CHAIN (outerc),
15019 OMP_CLAUSE__LOOPTEMP_);
15020 }
15021 else
15022 {
15023 temp = create_tmp_var (type);
15024 insert_decl_map (&ctx->outer->cb, temp, temp);
15025 }
15026 *pc = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__LOOPTEMP_);
15027 OMP_CLAUSE_DECL (*pc) = temp;
15028 pc = &OMP_CLAUSE_CHAIN (*pc);
15029 }
15030 *pc = clauses;
15031 }
15032
15033 /* The pre-body and input clauses go before the lowered GIMPLE_OMP_FOR. */
15034 dlist = NULL;
15035 body = NULL;
15036 lower_rec_input_clauses (gimple_omp_for_clauses (stmt), &body, &dlist, ctx,
15037 fdp);
15038 gimple_seq_add_seq (&body, gimple_omp_for_pre_body (stmt));
15039
15040 lower_omp (gimple_omp_body_ptr (stmt), ctx);
15041
15042 /* Lower the header expressions. At this point, we can assume that
15043 the header is of the form:
15044
15045 #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3)
15046
15047 We just need to make sure that VAL1, VAL2 and VAL3 are lowered
15048 using the .omp_data_s mapping, if needed. */
15049 for (i = 0; i < gimple_omp_for_collapse (stmt); i++)
15050 {
15051 rhs_p = gimple_omp_for_initial_ptr (stmt, i);
15052 if (!is_gimple_min_invariant (*rhs_p))
15053 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15054
15055 rhs_p = gimple_omp_for_final_ptr (stmt, i);
15056 if (!is_gimple_min_invariant (*rhs_p))
15057 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15058
15059 rhs_p = &TREE_OPERAND (gimple_omp_for_incr (stmt, i), 1);
15060 if (!is_gimple_min_invariant (*rhs_p))
15061 *rhs_p = get_formal_tmp_var (*rhs_p, &body);
15062 }
15063
15064 /* Once lowered, extract the bounds and clauses. */
15065 extract_omp_for_data (stmt, &fd, NULL);
15066
15067 if (is_gimple_omp_oacc (ctx->stmt)
15068 && !ctx_in_oacc_kernels_region (ctx))
15069 lower_oacc_head_tail (gimple_location (stmt),
15070 gimple_omp_for_clauses (stmt),
15071 &oacc_head, &oacc_tail, ctx);
15072
15073 /* Add OpenACC partitioning and reduction markers just before the loop */
15074 if (oacc_head)
15075 gimple_seq_add_seq (&body, oacc_head);
15076
15077 lower_omp_for_lastprivate (&fd, &body, &dlist, ctx);
15078
15079 if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
15080 for (tree c = gimple_omp_for_clauses (stmt); c; c = OMP_CLAUSE_CHAIN (c))
15081 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
15082 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
15083 {
15084 OMP_CLAUSE_DECL (c) = lookup_decl (OMP_CLAUSE_DECL (c), ctx);
15085 if (DECL_P (OMP_CLAUSE_LINEAR_STEP (c)))
15086 OMP_CLAUSE_LINEAR_STEP (c)
15087 = maybe_lookup_decl_in_outer_ctx (OMP_CLAUSE_LINEAR_STEP (c),
15088 ctx);
15089 }
15090
15091 if (!gimple_omp_for_grid_phony (stmt))
15092 gimple_seq_add_stmt (&body, stmt);
15093 gimple_seq_add_seq (&body, gimple_omp_body (stmt));
15094
15095 if (!gimple_omp_for_grid_phony (stmt))
15096 gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v,
15097 fd.loop.v));
15098
15099 /* After the loop, add exit clauses. */
15100 lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, ctx);
15101
15102 if (ctx->cancellable)
15103 gimple_seq_add_stmt (&body, gimple_build_label (ctx->cancel_label));
15104
15105 gimple_seq_add_seq (&body, dlist);
15106
15107 body = maybe_catch_exception (body);
15108
15109 if (!gimple_omp_for_grid_phony (stmt))
15110 {
15111 /* Region exit marker goes at the end of the loop body. */
15112 gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait));
15113 maybe_add_implicit_barrier_cancel (ctx, &body);
15114 }
15115
15116 /* Add OpenACC joining and reduction markers just after the loop. */
15117 if (oacc_tail)
15118 gimple_seq_add_seq (&body, oacc_tail);
15119
15120 pop_gimplify_context (new_stmt);
15121
15122 gimple_bind_append_vars (new_stmt, ctx->block_vars);
15123 BLOCK_VARS (block) = gimple_bind_vars (new_stmt);
15124 if (BLOCK_VARS (block))
15125 TREE_USED (block) = 1;
15126
15127 gimple_bind_set_body (new_stmt, body);
15128 gimple_omp_set_body (stmt, NULL);
15129 gimple_omp_for_set_pre_body (stmt, NULL);
15130 }
15131
15132 /* Callback for walk_stmts. Check if the current statement only contains
15133 GIMPLE_OMP_FOR or GIMPLE_OMP_SECTIONS. */
15134
15135 static tree
15136 check_combined_parallel (gimple_stmt_iterator *gsi_p,
15137 bool *handled_ops_p,
15138 struct walk_stmt_info *wi)
15139 {
15140 int *info = (int *) wi->info;
15141 gimple *stmt = gsi_stmt (*gsi_p);
15142
15143 *handled_ops_p = true;
15144 switch (gimple_code (stmt))
15145 {
15146 WALK_SUBSTMTS;
15147
15148 case GIMPLE_OMP_FOR:
15149 case GIMPLE_OMP_SECTIONS:
15150 *info = *info == 0 ? 1 : -1;
15151 break;
15152 default:
15153 *info = -1;
15154 break;
15155 }
15156 return NULL;
15157 }
15158
15159 struct omp_taskcopy_context
15160 {
15161 /* This field must be at the beginning, as we do "inheritance": Some
15162 callback functions for tree-inline.c (e.g., omp_copy_decl)
15163 receive a copy_body_data pointer that is up-casted to an
15164 omp_context pointer. */
15165 copy_body_data cb;
15166 omp_context *ctx;
15167 };
15168
15169 static tree
15170 task_copyfn_copy_decl (tree var, copy_body_data *cb)
15171 {
15172 struct omp_taskcopy_context *tcctx = (struct omp_taskcopy_context *) cb;
15173
15174 if (splay_tree_lookup (tcctx->ctx->sfield_map, (splay_tree_key) var))
15175 return create_tmp_var (TREE_TYPE (var));
15176
15177 return var;
15178 }
15179
15180 static tree
15181 task_copyfn_remap_type (struct omp_taskcopy_context *tcctx, tree orig_type)
15182 {
15183 tree name, new_fields = NULL, type, f;
15184
15185 type = lang_hooks.types.make_type (RECORD_TYPE);
15186 name = DECL_NAME (TYPE_NAME (orig_type));
15187 name = build_decl (gimple_location (tcctx->ctx->stmt),
15188 TYPE_DECL, name, type);
15189 TYPE_NAME (type) = name;
15190
15191 for (f = TYPE_FIELDS (orig_type); f ; f = TREE_CHAIN (f))
15192 {
15193 tree new_f = copy_node (f);
15194 DECL_CONTEXT (new_f) = type;
15195 TREE_TYPE (new_f) = remap_type (TREE_TYPE (f), &tcctx->cb);
15196 TREE_CHAIN (new_f) = new_fields;
15197 walk_tree (&DECL_SIZE (new_f), copy_tree_body_r, &tcctx->cb, NULL);
15198 walk_tree (&DECL_SIZE_UNIT (new_f), copy_tree_body_r, &tcctx->cb, NULL);
15199 walk_tree (&DECL_FIELD_OFFSET (new_f), copy_tree_body_r,
15200 &tcctx->cb, NULL);
15201 new_fields = new_f;
15202 tcctx->cb.decl_map->put (f, new_f);
15203 }
15204 TYPE_FIELDS (type) = nreverse (new_fields);
15205 layout_type (type);
15206 return type;
15207 }
15208
15209 /* Create task copyfn. */
15210
15211 static void
15212 create_task_copyfn (gomp_task *task_stmt, omp_context *ctx)
15213 {
15214 struct function *child_cfun;
15215 tree child_fn, t, c, src, dst, f, sf, arg, sarg, decl;
15216 tree record_type, srecord_type, bind, list;
15217 bool record_needs_remap = false, srecord_needs_remap = false;
15218 splay_tree_node n;
15219 struct omp_taskcopy_context tcctx;
15220 location_t loc = gimple_location (task_stmt);
15221
15222 child_fn = gimple_omp_task_copy_fn (task_stmt);
15223 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
15224 gcc_assert (child_cfun->cfg == NULL);
15225 DECL_SAVED_TREE (child_fn) = alloc_stmt_list ();
15226
15227 /* Reset DECL_CONTEXT on function arguments. */
15228 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
15229 DECL_CONTEXT (t) = child_fn;
15230
15231 /* Populate the function. */
15232 push_gimplify_context ();
15233 push_cfun (child_cfun);
15234
15235 bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL);
15236 TREE_SIDE_EFFECTS (bind) = 1;
15237 list = NULL;
15238 DECL_SAVED_TREE (child_fn) = bind;
15239 DECL_SOURCE_LOCATION (child_fn) = gimple_location (task_stmt);
15240
15241 /* Remap src and dst argument types if needed. */
15242 record_type = ctx->record_type;
15243 srecord_type = ctx->srecord_type;
15244 for (f = TYPE_FIELDS (record_type); f ; f = DECL_CHAIN (f))
15245 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
15246 {
15247 record_needs_remap = true;
15248 break;
15249 }
15250 for (f = TYPE_FIELDS (srecord_type); f ; f = DECL_CHAIN (f))
15251 if (variably_modified_type_p (TREE_TYPE (f), ctx->cb.src_fn))
15252 {
15253 srecord_needs_remap = true;
15254 break;
15255 }
15256
15257 if (record_needs_remap || srecord_needs_remap)
15258 {
15259 memset (&tcctx, '\0', sizeof (tcctx));
15260 tcctx.cb.src_fn = ctx->cb.src_fn;
15261 tcctx.cb.dst_fn = child_fn;
15262 tcctx.cb.src_node = cgraph_node::get (tcctx.cb.src_fn);
15263 gcc_checking_assert (tcctx.cb.src_node);
15264 tcctx.cb.dst_node = tcctx.cb.src_node;
15265 tcctx.cb.src_cfun = ctx->cb.src_cfun;
15266 tcctx.cb.copy_decl = task_copyfn_copy_decl;
15267 tcctx.cb.eh_lp_nr = 0;
15268 tcctx.cb.transform_call_graph_edges = CB_CGE_MOVE;
15269 tcctx.cb.decl_map = new hash_map<tree, tree>;
15270 tcctx.ctx = ctx;
15271
15272 if (record_needs_remap)
15273 record_type = task_copyfn_remap_type (&tcctx, record_type);
15274 if (srecord_needs_remap)
15275 srecord_type = task_copyfn_remap_type (&tcctx, srecord_type);
15276 }
15277 else
15278 tcctx.cb.decl_map = NULL;
15279
15280 arg = DECL_ARGUMENTS (child_fn);
15281 TREE_TYPE (arg) = build_pointer_type (record_type);
15282 sarg = DECL_CHAIN (arg);
15283 TREE_TYPE (sarg) = build_pointer_type (srecord_type);
15284
15285 /* First pass: initialize temporaries used in record_type and srecord_type
15286 sizes and field offsets. */
15287 if (tcctx.cb.decl_map)
15288 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15289 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15290 {
15291 tree *p;
15292
15293 decl = OMP_CLAUSE_DECL (c);
15294 p = tcctx.cb.decl_map->get (decl);
15295 if (p == NULL)
15296 continue;
15297 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15298 sf = (tree) n->value;
15299 sf = *tcctx.cb.decl_map->get (sf);
15300 src = build_simple_mem_ref_loc (loc, sarg);
15301 src = omp_build_component_ref (src, sf);
15302 t = build2 (MODIFY_EXPR, TREE_TYPE (*p), *p, src);
15303 append_to_statement_list (t, &list);
15304 }
15305
15306 /* Second pass: copy shared var pointers and copy construct non-VLA
15307 firstprivate vars. */
15308 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15309 switch (OMP_CLAUSE_CODE (c))
15310 {
15311 splay_tree_key key;
15312 case OMP_CLAUSE_SHARED:
15313 decl = OMP_CLAUSE_DECL (c);
15314 key = (splay_tree_key) decl;
15315 if (OMP_CLAUSE_SHARED_FIRSTPRIVATE (c))
15316 key = (splay_tree_key) &DECL_UID (decl);
15317 n = splay_tree_lookup (ctx->field_map, key);
15318 if (n == NULL)
15319 break;
15320 f = (tree) n->value;
15321 if (tcctx.cb.decl_map)
15322 f = *tcctx.cb.decl_map->get (f);
15323 n = splay_tree_lookup (ctx->sfield_map, key);
15324 sf = (tree) n->value;
15325 if (tcctx.cb.decl_map)
15326 sf = *tcctx.cb.decl_map->get (sf);
15327 src = build_simple_mem_ref_loc (loc, sarg);
15328 src = omp_build_component_ref (src, sf);
15329 dst = build_simple_mem_ref_loc (loc, arg);
15330 dst = omp_build_component_ref (dst, f);
15331 t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
15332 append_to_statement_list (t, &list);
15333 break;
15334 case OMP_CLAUSE_FIRSTPRIVATE:
15335 decl = OMP_CLAUSE_DECL (c);
15336 if (is_variable_sized (decl))
15337 break;
15338 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15339 if (n == NULL)
15340 break;
15341 f = (tree) n->value;
15342 if (tcctx.cb.decl_map)
15343 f = *tcctx.cb.decl_map->get (f);
15344 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15345 if (n != NULL)
15346 {
15347 sf = (tree) n->value;
15348 if (tcctx.cb.decl_map)
15349 sf = *tcctx.cb.decl_map->get (sf);
15350 src = build_simple_mem_ref_loc (loc, sarg);
15351 src = omp_build_component_ref (src, sf);
15352 if (use_pointer_for_field (decl, NULL) || is_reference (decl))
15353 src = build_simple_mem_ref_loc (loc, src);
15354 }
15355 else
15356 src = decl;
15357 dst = build_simple_mem_ref_loc (loc, arg);
15358 dst = omp_build_component_ref (dst, f);
15359 t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
15360 append_to_statement_list (t, &list);
15361 break;
15362 case OMP_CLAUSE_PRIVATE:
15363 if (! OMP_CLAUSE_PRIVATE_OUTER_REF (c))
15364 break;
15365 decl = OMP_CLAUSE_DECL (c);
15366 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15367 f = (tree) n->value;
15368 if (tcctx.cb.decl_map)
15369 f = *tcctx.cb.decl_map->get (f);
15370 n = splay_tree_lookup (ctx->sfield_map, (splay_tree_key) decl);
15371 if (n != NULL)
15372 {
15373 sf = (tree) n->value;
15374 if (tcctx.cb.decl_map)
15375 sf = *tcctx.cb.decl_map->get (sf);
15376 src = build_simple_mem_ref_loc (loc, sarg);
15377 src = omp_build_component_ref (src, sf);
15378 if (use_pointer_for_field (decl, NULL))
15379 src = build_simple_mem_ref_loc (loc, src);
15380 }
15381 else
15382 src = decl;
15383 dst = build_simple_mem_ref_loc (loc, arg);
15384 dst = omp_build_component_ref (dst, f);
15385 t = build2 (MODIFY_EXPR, TREE_TYPE (dst), dst, src);
15386 append_to_statement_list (t, &list);
15387 break;
15388 default:
15389 break;
15390 }
15391
15392 /* Last pass: handle VLA firstprivates. */
15393 if (tcctx.cb.decl_map)
15394 for (c = gimple_omp_task_clauses (task_stmt); c; c = OMP_CLAUSE_CHAIN (c))
15395 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15396 {
15397 tree ind, ptr, df;
15398
15399 decl = OMP_CLAUSE_DECL (c);
15400 if (!is_variable_sized (decl))
15401 continue;
15402 n = splay_tree_lookup (ctx->field_map, (splay_tree_key) decl);
15403 if (n == NULL)
15404 continue;
15405 f = (tree) n->value;
15406 f = *tcctx.cb.decl_map->get (f);
15407 gcc_assert (DECL_HAS_VALUE_EXPR_P (decl));
15408 ind = DECL_VALUE_EXPR (decl);
15409 gcc_assert (TREE_CODE (ind) == INDIRECT_REF);
15410 gcc_assert (DECL_P (TREE_OPERAND (ind, 0)));
15411 n = splay_tree_lookup (ctx->sfield_map,
15412 (splay_tree_key) TREE_OPERAND (ind, 0));
15413 sf = (tree) n->value;
15414 sf = *tcctx.cb.decl_map->get (sf);
15415 src = build_simple_mem_ref_loc (loc, sarg);
15416 src = omp_build_component_ref (src, sf);
15417 src = build_simple_mem_ref_loc (loc, src);
15418 dst = build_simple_mem_ref_loc (loc, arg);
15419 dst = omp_build_component_ref (dst, f);
15420 t = lang_hooks.decls.omp_clause_copy_ctor (c, dst, src);
15421 append_to_statement_list (t, &list);
15422 n = splay_tree_lookup (ctx->field_map,
15423 (splay_tree_key) TREE_OPERAND (ind, 0));
15424 df = (tree) n->value;
15425 df = *tcctx.cb.decl_map->get (df);
15426 ptr = build_simple_mem_ref_loc (loc, arg);
15427 ptr = omp_build_component_ref (ptr, df);
15428 t = build2 (MODIFY_EXPR, TREE_TYPE (ptr), ptr,
15429 build_fold_addr_expr_loc (loc, dst));
15430 append_to_statement_list (t, &list);
15431 }
15432
15433 t = build1 (RETURN_EXPR, void_type_node, NULL);
15434 append_to_statement_list (t, &list);
15435
15436 if (tcctx.cb.decl_map)
15437 delete tcctx.cb.decl_map;
15438 pop_gimplify_context (NULL);
15439 BIND_EXPR_BODY (bind) = list;
15440 pop_cfun ();
15441 }
15442
15443 static void
15444 lower_depend_clauses (tree *pclauses, gimple_seq *iseq, gimple_seq *oseq)
15445 {
15446 tree c, clauses;
15447 gimple *g;
15448 size_t n_in = 0, n_out = 0, idx = 2, i;
15449
15450 clauses = find_omp_clause (*pclauses, OMP_CLAUSE_DEPEND);
15451 gcc_assert (clauses);
15452 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
15453 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND)
15454 switch (OMP_CLAUSE_DEPEND_KIND (c))
15455 {
15456 case OMP_CLAUSE_DEPEND_IN:
15457 n_in++;
15458 break;
15459 case OMP_CLAUSE_DEPEND_OUT:
15460 case OMP_CLAUSE_DEPEND_INOUT:
15461 n_out++;
15462 break;
15463 case OMP_CLAUSE_DEPEND_SOURCE:
15464 case OMP_CLAUSE_DEPEND_SINK:
15465 /* FALLTHRU */
15466 default:
15467 gcc_unreachable ();
15468 }
15469 tree type = build_array_type_nelts (ptr_type_node, n_in + n_out + 2);
15470 tree array = create_tmp_var (type);
15471 TREE_ADDRESSABLE (array) = 1;
15472 tree r = build4 (ARRAY_REF, ptr_type_node, array, size_int (0), NULL_TREE,
15473 NULL_TREE);
15474 g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_in + n_out));
15475 gimple_seq_add_stmt (iseq, g);
15476 r = build4 (ARRAY_REF, ptr_type_node, array, size_int (1), NULL_TREE,
15477 NULL_TREE);
15478 g = gimple_build_assign (r, build_int_cst (ptr_type_node, n_out));
15479 gimple_seq_add_stmt (iseq, g);
15480 for (i = 0; i < 2; i++)
15481 {
15482 if ((i ? n_in : n_out) == 0)
15483 continue;
15484 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
15485 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND
15486 && ((OMP_CLAUSE_DEPEND_KIND (c) != OMP_CLAUSE_DEPEND_IN) ^ i))
15487 {
15488 tree t = OMP_CLAUSE_DECL (c);
15489 t = fold_convert (ptr_type_node, t);
15490 gimplify_expr (&t, iseq, NULL, is_gimple_val, fb_rvalue);
15491 r = build4 (ARRAY_REF, ptr_type_node, array, size_int (idx++),
15492 NULL_TREE, NULL_TREE);
15493 g = gimple_build_assign (r, t);
15494 gimple_seq_add_stmt (iseq, g);
15495 }
15496 }
15497 c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_DEPEND);
15498 OMP_CLAUSE_DECL (c) = build_fold_addr_expr (array);
15499 OMP_CLAUSE_CHAIN (c) = *pclauses;
15500 *pclauses = c;
15501 tree clobber = build_constructor (type, NULL);
15502 TREE_THIS_VOLATILE (clobber) = 1;
15503 g = gimple_build_assign (array, clobber);
15504 gimple_seq_add_stmt (oseq, g);
15505 }
15506
15507 /* Lower the OpenMP parallel or task directive in the current statement
15508 in GSI_P. CTX holds context information for the directive. */
15509
15510 static void
15511 lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15512 {
15513 tree clauses;
15514 tree child_fn, t;
15515 gimple *stmt = gsi_stmt (*gsi_p);
15516 gbind *par_bind, *bind, *dep_bind = NULL;
15517 gimple_seq par_body, olist, ilist, par_olist, par_rlist, par_ilist, new_body;
15518 location_t loc = gimple_location (stmt);
15519
15520 clauses = gimple_omp_taskreg_clauses (stmt);
15521 par_bind
15522 = as_a <gbind *> (gimple_seq_first_stmt (gimple_omp_body (stmt)));
15523 par_body = gimple_bind_body (par_bind);
15524 child_fn = ctx->cb.dst_fn;
15525 if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
15526 && !gimple_omp_parallel_combined_p (stmt))
15527 {
15528 struct walk_stmt_info wi;
15529 int ws_num = 0;
15530
15531 memset (&wi, 0, sizeof (wi));
15532 wi.info = &ws_num;
15533 wi.val_only = true;
15534 walk_gimple_seq (par_body, check_combined_parallel, NULL, &wi);
15535 if (ws_num == 1)
15536 gimple_omp_parallel_set_combined_p (stmt, true);
15537 }
15538 gimple_seq dep_ilist = NULL;
15539 gimple_seq dep_olist = NULL;
15540 if (gimple_code (stmt) == GIMPLE_OMP_TASK
15541 && find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
15542 {
15543 push_gimplify_context ();
15544 dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
15545 lower_depend_clauses (gimple_omp_task_clauses_ptr (stmt),
15546 &dep_ilist, &dep_olist);
15547 }
15548
15549 if (ctx->srecord_type)
15550 create_task_copyfn (as_a <gomp_task *> (stmt), ctx);
15551
15552 push_gimplify_context ();
15553
15554 par_olist = NULL;
15555 par_ilist = NULL;
15556 par_rlist = NULL;
15557 bool phony_construct = gimple_code (stmt) == GIMPLE_OMP_PARALLEL
15558 && gimple_omp_parallel_grid_phony (as_a <gomp_parallel *> (stmt));
15559 if (phony_construct && ctx->record_type)
15560 {
15561 gcc_checking_assert (!ctx->receiver_decl);
15562 ctx->receiver_decl = create_tmp_var
15563 (build_reference_type (ctx->record_type), ".omp_rec");
15564 }
15565 lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx, NULL);
15566 lower_omp (&par_body, ctx);
15567 if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL)
15568 lower_reduction_clauses (clauses, &par_rlist, ctx);
15569
15570 /* Declare all the variables created by mapping and the variables
15571 declared in the scope of the parallel body. */
15572 record_vars_into (ctx->block_vars, child_fn);
15573 record_vars_into (gimple_bind_vars (par_bind), child_fn);
15574
15575 if (ctx->record_type)
15576 {
15577 ctx->sender_decl
15578 = create_tmp_var (ctx->srecord_type ? ctx->srecord_type
15579 : ctx->record_type, ".omp_data_o");
15580 DECL_NAMELESS (ctx->sender_decl) = 1;
15581 TREE_ADDRESSABLE (ctx->sender_decl) = 1;
15582 gimple_omp_taskreg_set_data_arg (stmt, ctx->sender_decl);
15583 }
15584
15585 olist = NULL;
15586 ilist = NULL;
15587 lower_send_clauses (clauses, &ilist, &olist, ctx);
15588 lower_send_shared_vars (&ilist, &olist, ctx);
15589
15590 if (ctx->record_type)
15591 {
15592 tree clobber = build_constructor (TREE_TYPE (ctx->sender_decl), NULL);
15593 TREE_THIS_VOLATILE (clobber) = 1;
15594 gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
15595 clobber));
15596 }
15597
15598 /* Once all the expansions are done, sequence all the different
15599 fragments inside gimple_omp_body. */
15600
15601 new_body = NULL;
15602
15603 if (ctx->record_type)
15604 {
15605 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
15606 /* fixup_child_record_type might have changed receiver_decl's type. */
15607 t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
15608 gimple_seq_add_stmt (&new_body,
15609 gimple_build_assign (ctx->receiver_decl, t));
15610 }
15611
15612 gimple_seq_add_seq (&new_body, par_ilist);
15613 gimple_seq_add_seq (&new_body, par_body);
15614 gimple_seq_add_seq (&new_body, par_rlist);
15615 if (ctx->cancellable)
15616 gimple_seq_add_stmt (&new_body, gimple_build_label (ctx->cancel_label));
15617 gimple_seq_add_seq (&new_body, par_olist);
15618 new_body = maybe_catch_exception (new_body);
15619 if (gimple_code (stmt) == GIMPLE_OMP_TASK)
15620 gimple_seq_add_stmt (&new_body,
15621 gimple_build_omp_continue (integer_zero_node,
15622 integer_zero_node));
15623 if (!phony_construct)
15624 {
15625 gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
15626 gimple_omp_set_body (stmt, new_body);
15627 }
15628
15629 bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind));
15630 gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
15631 gimple_bind_add_seq (bind, ilist);
15632 if (!phony_construct)
15633 gimple_bind_add_stmt (bind, stmt);
15634 else
15635 gimple_bind_add_seq (bind, new_body);
15636 gimple_bind_add_seq (bind, olist);
15637
15638 pop_gimplify_context (NULL);
15639
15640 if (dep_bind)
15641 {
15642 gimple_bind_add_seq (dep_bind, dep_ilist);
15643 gimple_bind_add_stmt (dep_bind, bind);
15644 gimple_bind_add_seq (dep_bind, dep_olist);
15645 pop_gimplify_context (dep_bind);
15646 }
15647 }
15648
15649 /* Lower the GIMPLE_OMP_TARGET in the current statement
15650 in GSI_P. CTX holds context information for the directive. */
15651
15652 static void
15653 lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
15654 {
15655 tree clauses;
15656 tree child_fn, t, c;
15657 gomp_target *stmt = as_a <gomp_target *> (gsi_stmt (*gsi_p));
15658 gbind *tgt_bind, *bind, *dep_bind = NULL;
15659 gimple_seq tgt_body, olist, ilist, fplist, new_body;
15660 location_t loc = gimple_location (stmt);
15661 bool offloaded, data_region;
15662 unsigned int map_cnt = 0;
15663 bool has_depend = false;
15664
15665 offloaded = is_gimple_omp_offloaded (stmt);
15666 switch (gimple_omp_target_kind (stmt))
15667 {
15668 case GF_OMP_TARGET_KIND_REGION:
15669 case GF_OMP_TARGET_KIND_UPDATE:
15670 case GF_OMP_TARGET_KIND_ENTER_DATA:
15671 case GF_OMP_TARGET_KIND_EXIT_DATA:
15672 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
15673 case GF_OMP_TARGET_KIND_OACC_KERNELS:
15674 case GF_OMP_TARGET_KIND_OACC_UPDATE:
15675 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
15676 case GF_OMP_TARGET_KIND_OACC_DECLARE:
15677 data_region = false;
15678 break;
15679 case GF_OMP_TARGET_KIND_DATA:
15680 case GF_OMP_TARGET_KIND_OACC_DATA:
15681 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
15682 data_region = true;
15683 break;
15684 default:
15685 gcc_unreachable ();
15686 }
15687
15688 clauses = gimple_omp_target_clauses (stmt);
15689
15690 gimple_seq dep_ilist = NULL;
15691 gimple_seq dep_olist = NULL;
15692 if (find_omp_clause (clauses, OMP_CLAUSE_DEPEND))
15693 {
15694 push_gimplify_context ();
15695 dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
15696 lower_depend_clauses (gimple_omp_target_clauses_ptr (stmt),
15697 &dep_ilist, &dep_olist);
15698 has_depend = true;
15699 }
15700
15701 tgt_bind = NULL;
15702 tgt_body = NULL;
15703 if (offloaded)
15704 {
15705 tgt_bind = gimple_seq_first_stmt_as_a_bind (gimple_omp_body (stmt));
15706 tgt_body = gimple_bind_body (tgt_bind);
15707 }
15708 else if (data_region)
15709 tgt_body = gimple_omp_body (stmt);
15710 child_fn = ctx->cb.dst_fn;
15711
15712 push_gimplify_context ();
15713 fplist = NULL;
15714
15715 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
15716 switch (OMP_CLAUSE_CODE (c))
15717 {
15718 tree var, x;
15719
15720 default:
15721 break;
15722 case OMP_CLAUSE_MAP:
15723 #if CHECKING_P
15724 /* First check what we're prepared to handle in the following. */
15725 switch (OMP_CLAUSE_MAP_KIND (c))
15726 {
15727 case GOMP_MAP_ALLOC:
15728 case GOMP_MAP_TO:
15729 case GOMP_MAP_FROM:
15730 case GOMP_MAP_TOFROM:
15731 case GOMP_MAP_POINTER:
15732 case GOMP_MAP_TO_PSET:
15733 case GOMP_MAP_FORCE_DEALLOC:
15734 case GOMP_MAP_RELEASE:
15735 case GOMP_MAP_ALWAYS_TO:
15736 case GOMP_MAP_ALWAYS_FROM:
15737 case GOMP_MAP_ALWAYS_TOFROM:
15738 case GOMP_MAP_FIRSTPRIVATE_POINTER:
15739 case GOMP_MAP_FIRSTPRIVATE_REFERENCE:
15740 case GOMP_MAP_STRUCT:
15741 case GOMP_MAP_ALWAYS_POINTER:
15742 break;
15743 case GOMP_MAP_FORCE_ALLOC:
15744 case GOMP_MAP_FORCE_TO:
15745 case GOMP_MAP_FORCE_FROM:
15746 case GOMP_MAP_FORCE_TOFROM:
15747 case GOMP_MAP_FORCE_PRESENT:
15748 case GOMP_MAP_FORCE_DEVICEPTR:
15749 case GOMP_MAP_DEVICE_RESIDENT:
15750 case GOMP_MAP_LINK:
15751 gcc_assert (is_gimple_omp_oacc (stmt));
15752 break;
15753 default:
15754 gcc_unreachable ();
15755 }
15756 #endif
15757 /* FALLTHRU */
15758 case OMP_CLAUSE_TO:
15759 case OMP_CLAUSE_FROM:
15760 oacc_firstprivate:
15761 var = OMP_CLAUSE_DECL (c);
15762 if (!DECL_P (var))
15763 {
15764 if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP
15765 || (!OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
15766 && (OMP_CLAUSE_MAP_KIND (c)
15767 != GOMP_MAP_FIRSTPRIVATE_POINTER)))
15768 map_cnt++;
15769 continue;
15770 }
15771
15772 if (DECL_SIZE (var)
15773 && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
15774 {
15775 tree var2 = DECL_VALUE_EXPR (var);
15776 gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
15777 var2 = TREE_OPERAND (var2, 0);
15778 gcc_assert (DECL_P (var2));
15779 var = var2;
15780 }
15781
15782 if (offloaded
15783 && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
15784 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
15785 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE))
15786 {
15787 if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
15788 {
15789 if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx))
15790 && varpool_node::get_create (var)->offloadable)
15791 continue;
15792
15793 tree type = build_pointer_type (TREE_TYPE (var));
15794 tree new_var = lookup_decl (var, ctx);
15795 x = create_tmp_var_raw (type, get_name (new_var));
15796 gimple_add_tmp_var (x);
15797 x = build_simple_mem_ref (x);
15798 SET_DECL_VALUE_EXPR (new_var, x);
15799 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15800 }
15801 continue;
15802 }
15803
15804 if (!maybe_lookup_field (var, ctx))
15805 continue;
15806
15807 if (offloaded)
15808 {
15809 x = build_receiver_ref (var, true, ctx);
15810 tree new_var = lookup_decl (var, ctx);
15811
15812 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
15813 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
15814 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
15815 && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
15816 x = build_simple_mem_ref (x);
15817 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
15818 {
15819 gcc_assert (is_gimple_omp_oacc (ctx->stmt));
15820 if (is_reference (new_var))
15821 {
15822 /* Create a local object to hold the instance
15823 value. */
15824 tree type = TREE_TYPE (TREE_TYPE (new_var));
15825 const char *id = IDENTIFIER_POINTER (DECL_NAME (new_var));
15826 tree inst = create_tmp_var (type, id);
15827 gimplify_assign (inst, fold_indirect_ref (x), &fplist);
15828 x = build_fold_addr_expr (inst);
15829 }
15830 gimplify_assign (new_var, x, &fplist);
15831 }
15832 else if (DECL_P (new_var))
15833 {
15834 SET_DECL_VALUE_EXPR (new_var, x);
15835 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15836 }
15837 else
15838 gcc_unreachable ();
15839 }
15840 map_cnt++;
15841 break;
15842
15843 case OMP_CLAUSE_FIRSTPRIVATE:
15844 if (is_oacc_parallel (ctx))
15845 goto oacc_firstprivate;
15846 map_cnt++;
15847 var = OMP_CLAUSE_DECL (c);
15848 if (!is_reference (var)
15849 && !is_gimple_reg_type (TREE_TYPE (var)))
15850 {
15851 tree new_var = lookup_decl (var, ctx);
15852 if (is_variable_sized (var))
15853 {
15854 tree pvar = DECL_VALUE_EXPR (var);
15855 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
15856 pvar = TREE_OPERAND (pvar, 0);
15857 gcc_assert (DECL_P (pvar));
15858 tree new_pvar = lookup_decl (pvar, ctx);
15859 x = build_fold_indirect_ref (new_pvar);
15860 TREE_THIS_NOTRAP (x) = 1;
15861 }
15862 else
15863 x = build_receiver_ref (var, true, ctx);
15864 SET_DECL_VALUE_EXPR (new_var, x);
15865 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15866 }
15867 break;
15868
15869 case OMP_CLAUSE_PRIVATE:
15870 if (is_gimple_omp_oacc (ctx->stmt))
15871 break;
15872 var = OMP_CLAUSE_DECL (c);
15873 if (is_variable_sized (var))
15874 {
15875 tree new_var = lookup_decl (var, ctx);
15876 tree pvar = DECL_VALUE_EXPR (var);
15877 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
15878 pvar = TREE_OPERAND (pvar, 0);
15879 gcc_assert (DECL_P (pvar));
15880 tree new_pvar = lookup_decl (pvar, ctx);
15881 x = build_fold_indirect_ref (new_pvar);
15882 TREE_THIS_NOTRAP (x) = 1;
15883 SET_DECL_VALUE_EXPR (new_var, x);
15884 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15885 }
15886 break;
15887
15888 case OMP_CLAUSE_USE_DEVICE_PTR:
15889 case OMP_CLAUSE_IS_DEVICE_PTR:
15890 var = OMP_CLAUSE_DECL (c);
15891 map_cnt++;
15892 if (is_variable_sized (var))
15893 {
15894 tree new_var = lookup_decl (var, ctx);
15895 tree pvar = DECL_VALUE_EXPR (var);
15896 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
15897 pvar = TREE_OPERAND (pvar, 0);
15898 gcc_assert (DECL_P (pvar));
15899 tree new_pvar = lookup_decl (pvar, ctx);
15900 x = build_fold_indirect_ref (new_pvar);
15901 TREE_THIS_NOTRAP (x) = 1;
15902 SET_DECL_VALUE_EXPR (new_var, x);
15903 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15904 }
15905 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
15906 {
15907 tree new_var = lookup_decl (var, ctx);
15908 tree type = build_pointer_type (TREE_TYPE (var));
15909 x = create_tmp_var_raw (type, get_name (new_var));
15910 gimple_add_tmp_var (x);
15911 x = build_simple_mem_ref (x);
15912 SET_DECL_VALUE_EXPR (new_var, x);
15913 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15914 }
15915 else
15916 {
15917 tree new_var = lookup_decl (var, ctx);
15918 x = create_tmp_var_raw (TREE_TYPE (new_var), get_name (new_var));
15919 gimple_add_tmp_var (x);
15920 SET_DECL_VALUE_EXPR (new_var, x);
15921 DECL_HAS_VALUE_EXPR_P (new_var) = 1;
15922 }
15923 break;
15924 }
15925
15926 if (offloaded)
15927 {
15928 target_nesting_level++;
15929 lower_omp (&tgt_body, ctx);
15930 target_nesting_level--;
15931 }
15932 else if (data_region)
15933 lower_omp (&tgt_body, ctx);
15934
15935 if (offloaded)
15936 {
15937 /* Declare all the variables created by mapping and the variables
15938 declared in the scope of the target body. */
15939 record_vars_into (ctx->block_vars, child_fn);
15940 record_vars_into (gimple_bind_vars (tgt_bind), child_fn);
15941 }
15942
15943 olist = NULL;
15944 ilist = NULL;
15945 if (ctx->record_type)
15946 {
15947 ctx->sender_decl
15948 = create_tmp_var (ctx->record_type, ".omp_data_arr");
15949 DECL_NAMELESS (ctx->sender_decl) = 1;
15950 TREE_ADDRESSABLE (ctx->sender_decl) = 1;
15951 t = make_tree_vec (3);
15952 TREE_VEC_ELT (t, 0) = ctx->sender_decl;
15953 TREE_VEC_ELT (t, 1)
15954 = create_tmp_var (build_array_type_nelts (size_type_node, map_cnt),
15955 ".omp_data_sizes");
15956 DECL_NAMELESS (TREE_VEC_ELT (t, 1)) = 1;
15957 TREE_ADDRESSABLE (TREE_VEC_ELT (t, 1)) = 1;
15958 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 1;
15959 tree tkind_type = short_unsigned_type_node;
15960 int talign_shift = 8;
15961 TREE_VEC_ELT (t, 2)
15962 = create_tmp_var (build_array_type_nelts (tkind_type, map_cnt),
15963 ".omp_data_kinds");
15964 DECL_NAMELESS (TREE_VEC_ELT (t, 2)) = 1;
15965 TREE_ADDRESSABLE (TREE_VEC_ELT (t, 2)) = 1;
15966 TREE_STATIC (TREE_VEC_ELT (t, 2)) = 1;
15967 gimple_omp_target_set_data_arg (stmt, t);
15968
15969 vec<constructor_elt, va_gc> *vsize;
15970 vec<constructor_elt, va_gc> *vkind;
15971 vec_alloc (vsize, map_cnt);
15972 vec_alloc (vkind, map_cnt);
15973 unsigned int map_idx = 0;
15974
15975 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
15976 switch (OMP_CLAUSE_CODE (c))
15977 {
15978 tree ovar, nc, s, purpose, var, x, type;
15979 unsigned int talign;
15980
15981 default:
15982 break;
15983
15984 case OMP_CLAUSE_MAP:
15985 case OMP_CLAUSE_TO:
15986 case OMP_CLAUSE_FROM:
15987 oacc_firstprivate_map:
15988 nc = c;
15989 ovar = OMP_CLAUSE_DECL (c);
15990 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
15991 && (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
15992 || (OMP_CLAUSE_MAP_KIND (c)
15993 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
15994 break;
15995 if (!DECL_P (ovar))
15996 {
15997 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
15998 && OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c))
15999 {
16000 gcc_checking_assert (OMP_CLAUSE_DECL (OMP_CLAUSE_CHAIN (c))
16001 == get_base_address (ovar));
16002 nc = OMP_CLAUSE_CHAIN (c);
16003 ovar = OMP_CLAUSE_DECL (nc);
16004 }
16005 else
16006 {
16007 tree x = build_sender_ref (ovar, ctx);
16008 tree v
16009 = build_fold_addr_expr_with_type (ovar, ptr_type_node);
16010 gimplify_assign (x, v, &ilist);
16011 nc = NULL_TREE;
16012 }
16013 }
16014 else
16015 {
16016 if (DECL_SIZE (ovar)
16017 && TREE_CODE (DECL_SIZE (ovar)) != INTEGER_CST)
16018 {
16019 tree ovar2 = DECL_VALUE_EXPR (ovar);
16020 gcc_assert (TREE_CODE (ovar2) == INDIRECT_REF);
16021 ovar2 = TREE_OPERAND (ovar2, 0);
16022 gcc_assert (DECL_P (ovar2));
16023 ovar = ovar2;
16024 }
16025 if (!maybe_lookup_field (ovar, ctx))
16026 continue;
16027 }
16028
16029 talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
16030 if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
16031 talign = DECL_ALIGN_UNIT (ovar);
16032 if (nc)
16033 {
16034 var = lookup_decl_in_outer_ctx (ovar, ctx);
16035 x = build_sender_ref (ovar, ctx);
16036
16037 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP
16038 && OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_POINTER
16039 && !OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION (c)
16040 && TREE_CODE (TREE_TYPE (ovar)) == ARRAY_TYPE)
16041 {
16042 gcc_assert (offloaded);
16043 tree avar
16044 = create_tmp_var (TREE_TYPE (TREE_TYPE (x)));
16045 mark_addressable (avar);
16046 gimplify_assign (avar, build_fold_addr_expr (var), &ilist);
16047 talign = DECL_ALIGN_UNIT (avar);
16048 avar = build_fold_addr_expr (avar);
16049 gimplify_assign (x, avar, &ilist);
16050 }
16051 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
16052 {
16053 gcc_assert (is_gimple_omp_oacc (ctx->stmt));
16054 if (!is_reference (var))
16055 var = build_fold_addr_expr (var);
16056 else
16057 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16058 gimplify_assign (x, var, &ilist);
16059 }
16060 else if (is_gimple_reg (var))
16061 {
16062 gcc_assert (offloaded);
16063 tree avar = create_tmp_var (TREE_TYPE (var));
16064 mark_addressable (avar);
16065 enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c);
16066 if (GOMP_MAP_COPY_TO_P (map_kind)
16067 || map_kind == GOMP_MAP_POINTER
16068 || map_kind == GOMP_MAP_TO_PSET
16069 || map_kind == GOMP_MAP_FORCE_DEVICEPTR)
16070 gimplify_assign (avar, var, &ilist);
16071 avar = build_fold_addr_expr (avar);
16072 gimplify_assign (x, avar, &ilist);
16073 if ((GOMP_MAP_COPY_FROM_P (map_kind)
16074 || map_kind == GOMP_MAP_FORCE_DEVICEPTR)
16075 && !TYPE_READONLY (TREE_TYPE (var)))
16076 {
16077 x = unshare_expr (x);
16078 x = build_simple_mem_ref (x);
16079 gimplify_assign (var, x, &olist);
16080 }
16081 }
16082 else
16083 {
16084 var = build_fold_addr_expr (var);
16085 gimplify_assign (x, var, &ilist);
16086 }
16087 }
16088 s = NULL_TREE;
16089 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE)
16090 {
16091 gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
16092 s = TREE_TYPE (ovar);
16093 if (TREE_CODE (s) == REFERENCE_TYPE)
16094 s = TREE_TYPE (s);
16095 s = TYPE_SIZE_UNIT (s);
16096 }
16097 else
16098 s = OMP_CLAUSE_SIZE (c);
16099 if (s == NULL_TREE)
16100 s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
16101 s = fold_convert (size_type_node, s);
16102 purpose = size_int (map_idx++);
16103 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16104 if (TREE_CODE (s) != INTEGER_CST)
16105 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
16106
16107 unsigned HOST_WIDE_INT tkind, tkind_zero;
16108 switch (OMP_CLAUSE_CODE (c))
16109 {
16110 case OMP_CLAUSE_MAP:
16111 tkind = OMP_CLAUSE_MAP_KIND (c);
16112 tkind_zero = tkind;
16113 if (OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION (c))
16114 switch (tkind)
16115 {
16116 case GOMP_MAP_ALLOC:
16117 case GOMP_MAP_TO:
16118 case GOMP_MAP_FROM:
16119 case GOMP_MAP_TOFROM:
16120 case GOMP_MAP_ALWAYS_TO:
16121 case GOMP_MAP_ALWAYS_FROM:
16122 case GOMP_MAP_ALWAYS_TOFROM:
16123 case GOMP_MAP_RELEASE:
16124 tkind_zero = GOMP_MAP_ZERO_LEN_ARRAY_SECTION;
16125 break;
16126 case GOMP_MAP_DELETE:
16127 tkind_zero = GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION;
16128 default:
16129 break;
16130 }
16131 if (tkind_zero != tkind)
16132 {
16133 if (integer_zerop (s))
16134 tkind = tkind_zero;
16135 else if (integer_nonzerop (s))
16136 tkind_zero = tkind;
16137 }
16138 break;
16139 case OMP_CLAUSE_FIRSTPRIVATE:
16140 gcc_checking_assert (is_gimple_omp_oacc (ctx->stmt));
16141 tkind = GOMP_MAP_TO;
16142 tkind_zero = tkind;
16143 break;
16144 case OMP_CLAUSE_TO:
16145 tkind = GOMP_MAP_TO;
16146 tkind_zero = tkind;
16147 break;
16148 case OMP_CLAUSE_FROM:
16149 tkind = GOMP_MAP_FROM;
16150 tkind_zero = tkind;
16151 break;
16152 default:
16153 gcc_unreachable ();
16154 }
16155 gcc_checking_assert (tkind
16156 < (HOST_WIDE_INT_C (1U) << talign_shift));
16157 gcc_checking_assert (tkind_zero
16158 < (HOST_WIDE_INT_C (1U) << talign_shift));
16159 talign = ceil_log2 (talign);
16160 tkind |= talign << talign_shift;
16161 tkind_zero |= talign << talign_shift;
16162 gcc_checking_assert (tkind
16163 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16164 gcc_checking_assert (tkind_zero
16165 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16166 if (tkind == tkind_zero)
16167 x = build_int_cstu (tkind_type, tkind);
16168 else
16169 {
16170 TREE_STATIC (TREE_VEC_ELT (t, 2)) = 0;
16171 x = build3 (COND_EXPR, tkind_type,
16172 fold_build2 (EQ_EXPR, boolean_type_node,
16173 unshare_expr (s), size_zero_node),
16174 build_int_cstu (tkind_type, tkind_zero),
16175 build_int_cstu (tkind_type, tkind));
16176 }
16177 CONSTRUCTOR_APPEND_ELT (vkind, purpose, x);
16178 if (nc && nc != c)
16179 c = nc;
16180 break;
16181
16182 case OMP_CLAUSE_FIRSTPRIVATE:
16183 if (is_oacc_parallel (ctx))
16184 goto oacc_firstprivate_map;
16185 ovar = OMP_CLAUSE_DECL (c);
16186 if (is_reference (ovar))
16187 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16188 else
16189 talign = DECL_ALIGN_UNIT (ovar);
16190 var = lookup_decl_in_outer_ctx (ovar, ctx);
16191 x = build_sender_ref (ovar, ctx);
16192 tkind = GOMP_MAP_FIRSTPRIVATE;
16193 type = TREE_TYPE (ovar);
16194 if (is_reference (ovar))
16195 type = TREE_TYPE (type);
16196 bool use_firstprivate_int, force_addr;
16197 use_firstprivate_int = false;
16198 force_addr = false;
16199 if ((INTEGRAL_TYPE_P (type)
16200 && TYPE_PRECISION (type) <= POINTER_SIZE)
16201 || TREE_CODE (type) == POINTER_TYPE)
16202 use_firstprivate_int = true;
16203 if (has_depend)
16204 {
16205 if (is_reference (var))
16206 use_firstprivate_int = false;
16207 else if (is_gimple_reg (var))
16208 {
16209 if (DECL_HAS_VALUE_EXPR_P (var))
16210 {
16211 tree v = get_base_address (var);
16212 if (DECL_P (v) && TREE_ADDRESSABLE (v))
16213 {
16214 use_firstprivate_int = false;
16215 force_addr = true;
16216 }
16217 else
16218 switch (TREE_CODE (v))
16219 {
16220 case INDIRECT_REF:
16221 case MEM_REF:
16222 use_firstprivate_int = false;
16223 force_addr = true;
16224 break;
16225 default:
16226 break;
16227 }
16228 }
16229 }
16230 else
16231 use_firstprivate_int = false;
16232 }
16233 if (use_firstprivate_int)
16234 {
16235 tkind = GOMP_MAP_FIRSTPRIVATE_INT;
16236 tree t = var;
16237 if (is_reference (var))
16238 t = build_simple_mem_ref (var);
16239 if (TREE_CODE (type) != POINTER_TYPE)
16240 t = fold_convert (pointer_sized_int_node, t);
16241 t = fold_convert (TREE_TYPE (x), t);
16242 gimplify_assign (x, t, &ilist);
16243 }
16244 else if (is_reference (var))
16245 gimplify_assign (x, var, &ilist);
16246 else if (!force_addr && is_gimple_reg (var))
16247 {
16248 tree avar = create_tmp_var (TREE_TYPE (var));
16249 mark_addressable (avar);
16250 gimplify_assign (avar, var, &ilist);
16251 avar = build_fold_addr_expr (avar);
16252 gimplify_assign (x, avar, &ilist);
16253 }
16254 else
16255 {
16256 var = build_fold_addr_expr (var);
16257 gimplify_assign (x, var, &ilist);
16258 }
16259 if (tkind == GOMP_MAP_FIRSTPRIVATE_INT)
16260 s = size_int (0);
16261 else if (is_reference (var))
16262 s = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
16263 else
16264 s = TYPE_SIZE_UNIT (TREE_TYPE (ovar));
16265 s = fold_convert (size_type_node, s);
16266 purpose = size_int (map_idx++);
16267 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16268 if (TREE_CODE (s) != INTEGER_CST)
16269 TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
16270
16271 gcc_checking_assert (tkind
16272 < (HOST_WIDE_INT_C (1U) << talign_shift));
16273 talign = ceil_log2 (talign);
16274 tkind |= talign << talign_shift;
16275 gcc_checking_assert (tkind
16276 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16277 CONSTRUCTOR_APPEND_ELT (vkind, purpose,
16278 build_int_cstu (tkind_type, tkind));
16279 break;
16280
16281 case OMP_CLAUSE_USE_DEVICE_PTR:
16282 case OMP_CLAUSE_IS_DEVICE_PTR:
16283 ovar = OMP_CLAUSE_DECL (c);
16284 var = lookup_decl_in_outer_ctx (ovar, ctx);
16285 x = build_sender_ref (ovar, ctx);
16286 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
16287 tkind = GOMP_MAP_USE_DEVICE_PTR;
16288 else
16289 tkind = GOMP_MAP_FIRSTPRIVATE_INT;
16290 type = TREE_TYPE (ovar);
16291 if (TREE_CODE (type) == ARRAY_TYPE)
16292 var = build_fold_addr_expr (var);
16293 else
16294 {
16295 if (is_reference (ovar))
16296 {
16297 type = TREE_TYPE (type);
16298 if (TREE_CODE (type) != ARRAY_TYPE)
16299 var = build_simple_mem_ref (var);
16300 var = fold_convert (TREE_TYPE (x), var);
16301 }
16302 }
16303 gimplify_assign (x, var, &ilist);
16304 s = size_int (0);
16305 purpose = size_int (map_idx++);
16306 CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
16307 gcc_checking_assert (tkind
16308 < (HOST_WIDE_INT_C (1U) << talign_shift));
16309 gcc_checking_assert (tkind
16310 <= tree_to_uhwi (TYPE_MAX_VALUE (tkind_type)));
16311 CONSTRUCTOR_APPEND_ELT (vkind, purpose,
16312 build_int_cstu (tkind_type, tkind));
16313 break;
16314 }
16315
16316 gcc_assert (map_idx == map_cnt);
16317
16318 DECL_INITIAL (TREE_VEC_ELT (t, 1))
16319 = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 1)), vsize);
16320 DECL_INITIAL (TREE_VEC_ELT (t, 2))
16321 = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 2)), vkind);
16322 for (int i = 1; i <= 2; i++)
16323 if (!TREE_STATIC (TREE_VEC_ELT (t, i)))
16324 {
16325 gimple_seq initlist = NULL;
16326 force_gimple_operand (build1 (DECL_EXPR, void_type_node,
16327 TREE_VEC_ELT (t, i)),
16328 &initlist, true, NULL_TREE);
16329 gimple_seq_add_seq (&ilist, initlist);
16330
16331 tree clobber = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, i)),
16332 NULL);
16333 TREE_THIS_VOLATILE (clobber) = 1;
16334 gimple_seq_add_stmt (&olist,
16335 gimple_build_assign (TREE_VEC_ELT (t, i),
16336 clobber));
16337 }
16338
16339 tree clobber = build_constructor (ctx->record_type, NULL);
16340 TREE_THIS_VOLATILE (clobber) = 1;
16341 gimple_seq_add_stmt (&olist, gimple_build_assign (ctx->sender_decl,
16342 clobber));
16343 }
16344
16345 /* Once all the expansions are done, sequence all the different
16346 fragments inside gimple_omp_body. */
16347
16348 new_body = NULL;
16349
16350 if (offloaded
16351 && ctx->record_type)
16352 {
16353 t = build_fold_addr_expr_loc (loc, ctx->sender_decl);
16354 /* fixup_child_record_type might have changed receiver_decl's type. */
16355 t = fold_convert_loc (loc, TREE_TYPE (ctx->receiver_decl), t);
16356 gimple_seq_add_stmt (&new_body,
16357 gimple_build_assign (ctx->receiver_decl, t));
16358 }
16359 gimple_seq_add_seq (&new_body, fplist);
16360
16361 if (offloaded || data_region)
16362 {
16363 tree prev = NULL_TREE;
16364 for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c))
16365 switch (OMP_CLAUSE_CODE (c))
16366 {
16367 tree var, x;
16368 default:
16369 break;
16370 case OMP_CLAUSE_FIRSTPRIVATE:
16371 if (is_gimple_omp_oacc (ctx->stmt))
16372 break;
16373 var = OMP_CLAUSE_DECL (c);
16374 if (is_reference (var)
16375 || is_gimple_reg_type (TREE_TYPE (var)))
16376 {
16377 tree new_var = lookup_decl (var, ctx);
16378 tree type;
16379 type = TREE_TYPE (var);
16380 if (is_reference (var))
16381 type = TREE_TYPE (type);
16382 bool use_firstprivate_int;
16383 use_firstprivate_int = false;
16384 if ((INTEGRAL_TYPE_P (type)
16385 && TYPE_PRECISION (type) <= POINTER_SIZE)
16386 || TREE_CODE (type) == POINTER_TYPE)
16387 use_firstprivate_int = true;
16388 if (has_depend)
16389 {
16390 tree v = lookup_decl_in_outer_ctx (var, ctx);
16391 if (is_reference (v))
16392 use_firstprivate_int = false;
16393 else if (is_gimple_reg (v))
16394 {
16395 if (DECL_HAS_VALUE_EXPR_P (v))
16396 {
16397 v = get_base_address (v);
16398 if (DECL_P (v) && TREE_ADDRESSABLE (v))
16399 use_firstprivate_int = false;
16400 else
16401 switch (TREE_CODE (v))
16402 {
16403 case INDIRECT_REF:
16404 case MEM_REF:
16405 use_firstprivate_int = false;
16406 break;
16407 default:
16408 break;
16409 }
16410 }
16411 }
16412 else
16413 use_firstprivate_int = false;
16414 }
16415 if (use_firstprivate_int)
16416 {
16417 x = build_receiver_ref (var, false, ctx);
16418 if (TREE_CODE (type) != POINTER_TYPE)
16419 x = fold_convert (pointer_sized_int_node, x);
16420 x = fold_convert (type, x);
16421 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16422 fb_rvalue);
16423 if (is_reference (var))
16424 {
16425 tree v = create_tmp_var_raw (type, get_name (var));
16426 gimple_add_tmp_var (v);
16427 TREE_ADDRESSABLE (v) = 1;
16428 gimple_seq_add_stmt (&new_body,
16429 gimple_build_assign (v, x));
16430 x = build_fold_addr_expr (v);
16431 }
16432 gimple_seq_add_stmt (&new_body,
16433 gimple_build_assign (new_var, x));
16434 }
16435 else
16436 {
16437 x = build_receiver_ref (var, !is_reference (var), ctx);
16438 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16439 fb_rvalue);
16440 gimple_seq_add_stmt (&new_body,
16441 gimple_build_assign (new_var, x));
16442 }
16443 }
16444 else if (is_variable_sized (var))
16445 {
16446 tree pvar = DECL_VALUE_EXPR (var);
16447 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16448 pvar = TREE_OPERAND (pvar, 0);
16449 gcc_assert (DECL_P (pvar));
16450 tree new_var = lookup_decl (pvar, ctx);
16451 x = build_receiver_ref (var, false, ctx);
16452 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16453 gimple_seq_add_stmt (&new_body,
16454 gimple_build_assign (new_var, x));
16455 }
16456 break;
16457 case OMP_CLAUSE_PRIVATE:
16458 if (is_gimple_omp_oacc (ctx->stmt))
16459 break;
16460 var = OMP_CLAUSE_DECL (c);
16461 if (is_reference (var))
16462 {
16463 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16464 tree new_var = lookup_decl (var, ctx);
16465 x = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_var)));
16466 if (TREE_CONSTANT (x))
16467 {
16468 x = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (new_var)),
16469 get_name (var));
16470 gimple_add_tmp_var (x);
16471 TREE_ADDRESSABLE (x) = 1;
16472 x = build_fold_addr_expr_loc (clause_loc, x);
16473 }
16474 else
16475 {
16476 tree atmp
16477 = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
16478 tree rtype = TREE_TYPE (TREE_TYPE (new_var));
16479 tree al = size_int (TYPE_ALIGN (rtype));
16480 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
16481 }
16482
16483 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
16484 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16485 gimple_seq_add_stmt (&new_body,
16486 gimple_build_assign (new_var, x));
16487 }
16488 break;
16489 case OMP_CLAUSE_USE_DEVICE_PTR:
16490 case OMP_CLAUSE_IS_DEVICE_PTR:
16491 var = OMP_CLAUSE_DECL (c);
16492 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_USE_DEVICE_PTR)
16493 x = build_sender_ref (var, ctx);
16494 else
16495 x = build_receiver_ref (var, false, ctx);
16496 if (is_variable_sized (var))
16497 {
16498 tree pvar = DECL_VALUE_EXPR (var);
16499 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16500 pvar = TREE_OPERAND (pvar, 0);
16501 gcc_assert (DECL_P (pvar));
16502 tree new_var = lookup_decl (pvar, ctx);
16503 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16504 gimple_seq_add_stmt (&new_body,
16505 gimple_build_assign (new_var, x));
16506 }
16507 else if (TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE)
16508 {
16509 tree new_var = lookup_decl (var, ctx);
16510 new_var = DECL_VALUE_EXPR (new_var);
16511 gcc_assert (TREE_CODE (new_var) == MEM_REF);
16512 new_var = TREE_OPERAND (new_var, 0);
16513 gcc_assert (DECL_P (new_var));
16514 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16515 gimple_seq_add_stmt (&new_body,
16516 gimple_build_assign (new_var, x));
16517 }
16518 else
16519 {
16520 tree type = TREE_TYPE (var);
16521 tree new_var = lookup_decl (var, ctx);
16522 if (is_reference (var))
16523 {
16524 type = TREE_TYPE (type);
16525 if (TREE_CODE (type) != ARRAY_TYPE)
16526 {
16527 tree v = create_tmp_var_raw (type, get_name (var));
16528 gimple_add_tmp_var (v);
16529 TREE_ADDRESSABLE (v) = 1;
16530 x = fold_convert (type, x);
16531 gimplify_expr (&x, &new_body, NULL, is_gimple_val,
16532 fb_rvalue);
16533 gimple_seq_add_stmt (&new_body,
16534 gimple_build_assign (v, x));
16535 x = build_fold_addr_expr (v);
16536 }
16537 }
16538 new_var = DECL_VALUE_EXPR (new_var);
16539 x = fold_convert (TREE_TYPE (new_var), x);
16540 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16541 gimple_seq_add_stmt (&new_body,
16542 gimple_build_assign (new_var, x));
16543 }
16544 break;
16545 }
16546 /* Handle GOMP_MAP_FIRSTPRIVATE_{POINTER,REFERENCE} in second pass,
16547 so that firstprivate vars holding OMP_CLAUSE_SIZE if needed
16548 are already handled. */
16549 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
16550 switch (OMP_CLAUSE_CODE (c))
16551 {
16552 tree var;
16553 default:
16554 break;
16555 case OMP_CLAUSE_MAP:
16556 if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_POINTER
16557 || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)
16558 {
16559 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16560 HOST_WIDE_INT offset = 0;
16561 gcc_assert (prev);
16562 var = OMP_CLAUSE_DECL (c);
16563 if (DECL_P (var)
16564 && TREE_CODE (TREE_TYPE (var)) == ARRAY_TYPE
16565 && is_global_var (maybe_lookup_decl_in_outer_ctx (var,
16566 ctx))
16567 && varpool_node::get_create (var)->offloadable)
16568 break;
16569 if (TREE_CODE (var) == INDIRECT_REF
16570 && TREE_CODE (TREE_OPERAND (var, 0)) == COMPONENT_REF)
16571 var = TREE_OPERAND (var, 0);
16572 if (TREE_CODE (var) == COMPONENT_REF)
16573 {
16574 var = get_addr_base_and_unit_offset (var, &offset);
16575 gcc_assert (var != NULL_TREE && DECL_P (var));
16576 }
16577 else if (DECL_SIZE (var)
16578 && TREE_CODE (DECL_SIZE (var)) != INTEGER_CST)
16579 {
16580 tree var2 = DECL_VALUE_EXPR (var);
16581 gcc_assert (TREE_CODE (var2) == INDIRECT_REF);
16582 var2 = TREE_OPERAND (var2, 0);
16583 gcc_assert (DECL_P (var2));
16584 var = var2;
16585 }
16586 tree new_var = lookup_decl (var, ctx), x;
16587 tree type = TREE_TYPE (new_var);
16588 bool is_ref;
16589 if (TREE_CODE (OMP_CLAUSE_DECL (c)) == INDIRECT_REF
16590 && (TREE_CODE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0))
16591 == COMPONENT_REF))
16592 {
16593 type = TREE_TYPE (TREE_OPERAND (OMP_CLAUSE_DECL (c), 0));
16594 is_ref = true;
16595 new_var = build2 (MEM_REF, type,
16596 build_fold_addr_expr (new_var),
16597 build_int_cst (build_pointer_type (type),
16598 offset));
16599 }
16600 else if (TREE_CODE (OMP_CLAUSE_DECL (c)) == COMPONENT_REF)
16601 {
16602 type = TREE_TYPE (OMP_CLAUSE_DECL (c));
16603 is_ref = TREE_CODE (type) == REFERENCE_TYPE;
16604 new_var = build2 (MEM_REF, type,
16605 build_fold_addr_expr (new_var),
16606 build_int_cst (build_pointer_type (type),
16607 offset));
16608 }
16609 else
16610 is_ref = is_reference (var);
16611 if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE)
16612 is_ref = false;
16613 bool ref_to_array = false;
16614 if (is_ref)
16615 {
16616 type = TREE_TYPE (type);
16617 if (TREE_CODE (type) == ARRAY_TYPE)
16618 {
16619 type = build_pointer_type (type);
16620 ref_to_array = true;
16621 }
16622 }
16623 else if (TREE_CODE (type) == ARRAY_TYPE)
16624 {
16625 tree decl2 = DECL_VALUE_EXPR (new_var);
16626 gcc_assert (TREE_CODE (decl2) == MEM_REF);
16627 decl2 = TREE_OPERAND (decl2, 0);
16628 gcc_assert (DECL_P (decl2));
16629 new_var = decl2;
16630 type = TREE_TYPE (new_var);
16631 }
16632 x = build_receiver_ref (OMP_CLAUSE_DECL (prev), false, ctx);
16633 x = fold_convert_loc (clause_loc, type, x);
16634 if (!integer_zerop (OMP_CLAUSE_SIZE (c)))
16635 {
16636 tree bias = OMP_CLAUSE_SIZE (c);
16637 if (DECL_P (bias))
16638 bias = lookup_decl (bias, ctx);
16639 bias = fold_convert_loc (clause_loc, sizetype, bias);
16640 bias = fold_build1_loc (clause_loc, NEGATE_EXPR, sizetype,
16641 bias);
16642 x = fold_build2_loc (clause_loc, POINTER_PLUS_EXPR,
16643 TREE_TYPE (x), x, bias);
16644 }
16645 if (ref_to_array)
16646 x = fold_convert_loc (clause_loc, TREE_TYPE (new_var), x);
16647 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16648 if (is_ref && !ref_to_array)
16649 {
16650 tree t = create_tmp_var_raw (type, get_name (var));
16651 gimple_add_tmp_var (t);
16652 TREE_ADDRESSABLE (t) = 1;
16653 gimple_seq_add_stmt (&new_body,
16654 gimple_build_assign (t, x));
16655 x = build_fold_addr_expr_loc (clause_loc, t);
16656 }
16657 gimple_seq_add_stmt (&new_body,
16658 gimple_build_assign (new_var, x));
16659 prev = NULL_TREE;
16660 }
16661 else if (OMP_CLAUSE_CHAIN (c)
16662 && OMP_CLAUSE_CODE (OMP_CLAUSE_CHAIN (c))
16663 == OMP_CLAUSE_MAP
16664 && (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
16665 == GOMP_MAP_FIRSTPRIVATE_POINTER
16666 || (OMP_CLAUSE_MAP_KIND (OMP_CLAUSE_CHAIN (c))
16667 == GOMP_MAP_FIRSTPRIVATE_REFERENCE)))
16668 prev = c;
16669 break;
16670 case OMP_CLAUSE_PRIVATE:
16671 var = OMP_CLAUSE_DECL (c);
16672 if (is_variable_sized (var))
16673 {
16674 location_t clause_loc = OMP_CLAUSE_LOCATION (c);
16675 tree new_var = lookup_decl (var, ctx);
16676 tree pvar = DECL_VALUE_EXPR (var);
16677 gcc_assert (TREE_CODE (pvar) == INDIRECT_REF);
16678 pvar = TREE_OPERAND (pvar, 0);
16679 gcc_assert (DECL_P (pvar));
16680 tree new_pvar = lookup_decl (pvar, ctx);
16681 tree atmp = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
16682 tree al = size_int (DECL_ALIGN (var));
16683 tree x = TYPE_SIZE_UNIT (TREE_TYPE (new_var));
16684 x = build_call_expr_loc (clause_loc, atmp, 2, x, al);
16685 x = fold_convert_loc (clause_loc, TREE_TYPE (new_pvar), x);
16686 gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue);
16687 gimple_seq_add_stmt (&new_body,
16688 gimple_build_assign (new_pvar, x));
16689 }
16690 break;
16691 }
16692
16693 gimple_seq fork_seq = NULL;
16694 gimple_seq join_seq = NULL;
16695
16696 if (is_oacc_parallel (ctx))
16697 {
16698 /* If there are reductions on the offloaded region itself, treat
16699 them as a dummy GANG loop. */
16700 tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
16701
16702 lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
16703 false, NULL, NULL, &fork_seq, &join_seq, ctx);
16704 }
16705
16706 gimple_seq_add_seq (&new_body, fork_seq);
16707 gimple_seq_add_seq (&new_body, tgt_body);
16708 gimple_seq_add_seq (&new_body, join_seq);
16709
16710 if (offloaded)
16711 new_body = maybe_catch_exception (new_body);
16712
16713 gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false));
16714 gimple_omp_set_body (stmt, new_body);
16715 }
16716
16717 bind = gimple_build_bind (NULL, NULL,
16718 tgt_bind ? gimple_bind_block (tgt_bind)
16719 : NULL_TREE);
16720 gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
16721 gimple_bind_add_seq (bind, ilist);
16722 gimple_bind_add_stmt (bind, stmt);
16723 gimple_bind_add_seq (bind, olist);
16724
16725 pop_gimplify_context (NULL);
16726
16727 if (dep_bind)
16728 {
16729 gimple_bind_add_seq (dep_bind, dep_ilist);
16730 gimple_bind_add_stmt (dep_bind, bind);
16731 gimple_bind_add_seq (dep_bind, dep_olist);
16732 pop_gimplify_context (dep_bind);
16733 }
16734 }
16735
16736 /* Expand code for an OpenMP teams directive. */
16737
16738 static void
16739 lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
16740 {
16741 gomp_teams *teams_stmt = as_a <gomp_teams *> (gsi_stmt (*gsi_p));
16742 push_gimplify_context ();
16743
16744 tree block = make_node (BLOCK);
16745 gbind *bind = gimple_build_bind (NULL, NULL, block);
16746 gsi_replace (gsi_p, bind, true);
16747 gimple_seq bind_body = NULL;
16748 gimple_seq dlist = NULL;
16749 gimple_seq olist = NULL;
16750
16751 tree num_teams = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
16752 OMP_CLAUSE_NUM_TEAMS);
16753 if (num_teams == NULL_TREE)
16754 num_teams = build_int_cst (unsigned_type_node, 0);
16755 else
16756 {
16757 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
16758 num_teams = fold_convert (unsigned_type_node, num_teams);
16759 gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue);
16760 }
16761 tree thread_limit = find_omp_clause (gimple_omp_teams_clauses (teams_stmt),
16762 OMP_CLAUSE_THREAD_LIMIT);
16763 if (thread_limit == NULL_TREE)
16764 thread_limit = build_int_cst (unsigned_type_node, 0);
16765 else
16766 {
16767 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
16768 thread_limit = fold_convert (unsigned_type_node, thread_limit);
16769 gimplify_expr (&thread_limit, &bind_body, NULL, is_gimple_val,
16770 fb_rvalue);
16771 }
16772
16773 lower_rec_input_clauses (gimple_omp_teams_clauses (teams_stmt),
16774 &bind_body, &dlist, ctx, NULL);
16775 lower_omp (gimple_omp_body_ptr (teams_stmt), ctx);
16776 lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist, ctx);
16777 if (!gimple_omp_teams_grid_phony (teams_stmt))
16778 {
16779 gimple_seq_add_stmt (&bind_body, teams_stmt);
16780 location_t loc = gimple_location (teams_stmt);
16781 tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS);
16782 gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit);
16783 gimple_set_location (call, loc);
16784 gimple_seq_add_stmt (&bind_body, call);
16785 }
16786
16787 gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
16788 gimple_omp_set_body (teams_stmt, NULL);
16789 gimple_seq_add_seq (&bind_body, olist);
16790 gimple_seq_add_seq (&bind_body, dlist);
16791 if (!gimple_omp_teams_grid_phony (teams_stmt))
16792 gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true));
16793 gimple_bind_set_body (bind, bind_body);
16794
16795 pop_gimplify_context (bind);
16796
16797 gimple_bind_append_vars (bind, ctx->block_vars);
16798 BLOCK_VARS (block) = ctx->block_vars;
16799 if (BLOCK_VARS (block))
16800 TREE_USED (block) = 1;
16801 }
16802
16803 /* Expand code within an artificial GIMPLE_OMP_GRID_BODY OMP construct. */
16804
16805 static void
16806 lower_omp_grid_body (gimple_stmt_iterator *gsi_p, omp_context *ctx)
16807 {
16808 gimple *stmt = gsi_stmt (*gsi_p);
16809 lower_omp (gimple_omp_body_ptr (stmt), ctx);
16810 gimple_seq_add_stmt (gimple_omp_body_ptr (stmt),
16811 gimple_build_omp_return (false));
16812 }
16813
16814
16815 /* Callback for lower_omp_1. Return non-NULL if *tp needs to be
16816 regimplified. If DATA is non-NULL, lower_omp_1 is outside
16817 of OMP context, but with task_shared_vars set. */
16818
16819 static tree
16820 lower_omp_regimplify_p (tree *tp, int *walk_subtrees,
16821 void *data)
16822 {
16823 tree t = *tp;
16824
16825 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
16826 if (TREE_CODE (t) == VAR_DECL && data == NULL && DECL_HAS_VALUE_EXPR_P (t))
16827 return t;
16828
16829 if (task_shared_vars
16830 && DECL_P (t)
16831 && bitmap_bit_p (task_shared_vars, DECL_UID (t)))
16832 return t;
16833
16834 /* If a global variable has been privatized, TREE_CONSTANT on
16835 ADDR_EXPR might be wrong. */
16836 if (data == NULL && TREE_CODE (t) == ADDR_EXPR)
16837 recompute_tree_invariant_for_addr_expr (t);
16838
16839 *walk_subtrees = !IS_TYPE_OR_DECL_P (t);
16840 return NULL_TREE;
16841 }
16842
16843 /* Data to be communicated between lower_omp_regimplify_operands and
16844 lower_omp_regimplify_operands_p. */
16845
16846 struct lower_omp_regimplify_operands_data
16847 {
16848 omp_context *ctx;
16849 vec<tree> *decls;
16850 };
16851
16852 /* Helper function for lower_omp_regimplify_operands. Find
16853 omp_member_access_dummy_var vars and adjust temporarily their
16854 DECL_VALUE_EXPRs if needed. */
16855
16856 static tree
16857 lower_omp_regimplify_operands_p (tree *tp, int *walk_subtrees,
16858 void *data)
16859 {
16860 tree t = omp_member_access_dummy_var (*tp);
16861 if (t)
16862 {
16863 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
16864 lower_omp_regimplify_operands_data *ldata
16865 = (lower_omp_regimplify_operands_data *) wi->info;
16866 tree o = maybe_lookup_decl (t, ldata->ctx);
16867 if (o != t)
16868 {
16869 ldata->decls->safe_push (DECL_VALUE_EXPR (*tp));
16870 ldata->decls->safe_push (*tp);
16871 tree v = unshare_and_remap (DECL_VALUE_EXPR (*tp), t, o);
16872 SET_DECL_VALUE_EXPR (*tp, v);
16873 }
16874 }
16875 *walk_subtrees = !IS_TYPE_OR_DECL_P (*tp);
16876 return NULL_TREE;
16877 }
16878
16879 /* Wrapper around gimple_regimplify_operands that adjusts DECL_VALUE_EXPRs
16880 of omp_member_access_dummy_var vars during regimplification. */
16881
16882 static void
16883 lower_omp_regimplify_operands (omp_context *ctx, gimple *stmt,
16884 gimple_stmt_iterator *gsi_p)
16885 {
16886 auto_vec<tree, 10> decls;
16887 if (ctx)
16888 {
16889 struct walk_stmt_info wi;
16890 memset (&wi, '\0', sizeof (wi));
16891 struct lower_omp_regimplify_operands_data data;
16892 data.ctx = ctx;
16893 data.decls = &decls;
16894 wi.info = &data;
16895 walk_gimple_op (stmt, lower_omp_regimplify_operands_p, &wi);
16896 }
16897 gimple_regimplify_operands (stmt, gsi_p);
16898 while (!decls.is_empty ())
16899 {
16900 tree t = decls.pop ();
16901 tree v = decls.pop ();
16902 SET_DECL_VALUE_EXPR (t, v);
16903 }
16904 }
16905
16906 static void
16907 lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
16908 {
16909 gimple *stmt = gsi_stmt (*gsi_p);
16910 struct walk_stmt_info wi;
16911 gcall *call_stmt;
16912
16913 if (gimple_has_location (stmt))
16914 input_location = gimple_location (stmt);
16915
16916 if (task_shared_vars)
16917 memset (&wi, '\0', sizeof (wi));
16918
16919 /* If we have issued syntax errors, avoid doing any heavy lifting.
16920 Just replace the OMP directives with a NOP to avoid
16921 confusing RTL expansion. */
16922 if (seen_error () && is_gimple_omp (stmt))
16923 {
16924 gsi_replace (gsi_p, gimple_build_nop (), true);
16925 return;
16926 }
16927
16928 switch (gimple_code (stmt))
16929 {
16930 case GIMPLE_COND:
16931 {
16932 gcond *cond_stmt = as_a <gcond *> (stmt);
16933 if ((ctx || task_shared_vars)
16934 && (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
16935 lower_omp_regimplify_p,
16936 ctx ? NULL : &wi, NULL)
16937 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
16938 lower_omp_regimplify_p,
16939 ctx ? NULL : &wi, NULL)))
16940 lower_omp_regimplify_operands (ctx, cond_stmt, gsi_p);
16941 }
16942 break;
16943 case GIMPLE_CATCH:
16944 lower_omp (gimple_catch_handler_ptr (as_a <gcatch *> (stmt)), ctx);
16945 break;
16946 case GIMPLE_EH_FILTER:
16947 lower_omp (gimple_eh_filter_failure_ptr (stmt), ctx);
16948 break;
16949 case GIMPLE_TRY:
16950 lower_omp (gimple_try_eval_ptr (stmt), ctx);
16951 lower_omp (gimple_try_cleanup_ptr (stmt), ctx);
16952 break;
16953 case GIMPLE_TRANSACTION:
16954 lower_omp (gimple_transaction_body_ptr (
16955 as_a <gtransaction *> (stmt)),
16956 ctx);
16957 break;
16958 case GIMPLE_BIND:
16959 lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
16960 break;
16961 case GIMPLE_OMP_PARALLEL:
16962 case GIMPLE_OMP_TASK:
16963 ctx = maybe_lookup_ctx (stmt);
16964 gcc_assert (ctx);
16965 if (ctx->cancellable)
16966 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
16967 lower_omp_taskreg (gsi_p, ctx);
16968 break;
16969 case GIMPLE_OMP_FOR:
16970 ctx = maybe_lookup_ctx (stmt);
16971 gcc_assert (ctx);
16972 if (ctx->cancellable)
16973 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
16974 lower_omp_for (gsi_p, ctx);
16975 break;
16976 case GIMPLE_OMP_SECTIONS:
16977 ctx = maybe_lookup_ctx (stmt);
16978 gcc_assert (ctx);
16979 if (ctx->cancellable)
16980 ctx->cancel_label = create_artificial_label (UNKNOWN_LOCATION);
16981 lower_omp_sections (gsi_p, ctx);
16982 break;
16983 case GIMPLE_OMP_SINGLE:
16984 ctx = maybe_lookup_ctx (stmt);
16985 gcc_assert (ctx);
16986 lower_omp_single (gsi_p, ctx);
16987 break;
16988 case GIMPLE_OMP_MASTER:
16989 ctx = maybe_lookup_ctx (stmt);
16990 gcc_assert (ctx);
16991 lower_omp_master (gsi_p, ctx);
16992 break;
16993 case GIMPLE_OMP_TASKGROUP:
16994 ctx = maybe_lookup_ctx (stmt);
16995 gcc_assert (ctx);
16996 lower_omp_taskgroup (gsi_p, ctx);
16997 break;
16998 case GIMPLE_OMP_ORDERED:
16999 ctx = maybe_lookup_ctx (stmt);
17000 gcc_assert (ctx);
17001 lower_omp_ordered (gsi_p, ctx);
17002 break;
17003 case GIMPLE_OMP_CRITICAL:
17004 ctx = maybe_lookup_ctx (stmt);
17005 gcc_assert (ctx);
17006 lower_omp_critical (gsi_p, ctx);
17007 break;
17008 case GIMPLE_OMP_ATOMIC_LOAD:
17009 if ((ctx || task_shared_vars)
17010 && walk_tree (gimple_omp_atomic_load_rhs_ptr (
17011 as_a <gomp_atomic_load *> (stmt)),
17012 lower_omp_regimplify_p, ctx ? NULL : &wi, NULL))
17013 lower_omp_regimplify_operands (ctx, stmt, gsi_p);
17014 break;
17015 case GIMPLE_OMP_TARGET:
17016 ctx = maybe_lookup_ctx (stmt);
17017 gcc_assert (ctx);
17018 lower_omp_target (gsi_p, ctx);
17019 break;
17020 case GIMPLE_OMP_TEAMS:
17021 ctx = maybe_lookup_ctx (stmt);
17022 gcc_assert (ctx);
17023 lower_omp_teams (gsi_p, ctx);
17024 break;
17025 case GIMPLE_OMP_GRID_BODY:
17026 ctx = maybe_lookup_ctx (stmt);
17027 gcc_assert (ctx);
17028 lower_omp_grid_body (gsi_p, ctx);
17029 break;
17030 case GIMPLE_CALL:
17031 tree fndecl;
17032 call_stmt = as_a <gcall *> (stmt);
17033 fndecl = gimple_call_fndecl (call_stmt);
17034 if (fndecl
17035 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
17036 switch (DECL_FUNCTION_CODE (fndecl))
17037 {
17038 case BUILT_IN_GOMP_BARRIER:
17039 if (ctx == NULL)
17040 break;
17041 /* FALLTHRU */
17042 case BUILT_IN_GOMP_CANCEL:
17043 case BUILT_IN_GOMP_CANCELLATION_POINT:
17044 omp_context *cctx;
17045 cctx = ctx;
17046 if (gimple_code (cctx->stmt) == GIMPLE_OMP_SECTION)
17047 cctx = cctx->outer;
17048 gcc_assert (gimple_call_lhs (call_stmt) == NULL_TREE);
17049 if (!cctx->cancellable)
17050 {
17051 if (DECL_FUNCTION_CODE (fndecl)
17052 == BUILT_IN_GOMP_CANCELLATION_POINT)
17053 {
17054 stmt = gimple_build_nop ();
17055 gsi_replace (gsi_p, stmt, false);
17056 }
17057 break;
17058 }
17059 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_GOMP_BARRIER)
17060 {
17061 fndecl = builtin_decl_explicit (BUILT_IN_GOMP_BARRIER_CANCEL);
17062 gimple_call_set_fndecl (call_stmt, fndecl);
17063 gimple_call_set_fntype (call_stmt, TREE_TYPE (fndecl));
17064 }
17065 tree lhs;
17066 lhs = create_tmp_var (TREE_TYPE (TREE_TYPE (fndecl)));
17067 gimple_call_set_lhs (call_stmt, lhs);
17068 tree fallthru_label;
17069 fallthru_label = create_artificial_label (UNKNOWN_LOCATION);
17070 gimple *g;
17071 g = gimple_build_label (fallthru_label);
17072 gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
17073 g = gimple_build_cond (NE_EXPR, lhs,
17074 fold_convert (TREE_TYPE (lhs),
17075 boolean_false_node),
17076 cctx->cancel_label, fallthru_label);
17077 gsi_insert_after (gsi_p, g, GSI_SAME_STMT);
17078 break;
17079 default:
17080 break;
17081 }
17082 /* FALLTHRU */
17083 default:
17084 if ((ctx || task_shared_vars)
17085 && walk_gimple_op (stmt, lower_omp_regimplify_p,
17086 ctx ? NULL : &wi))
17087 {
17088 /* Just remove clobbers, this should happen only if we have
17089 "privatized" local addressable variables in SIMD regions,
17090 the clobber isn't needed in that case and gimplifying address
17091 of the ARRAY_REF into a pointer and creating MEM_REF based
17092 clobber would create worse code than we get with the clobber
17093 dropped. */
17094 if (gimple_clobber_p (stmt))
17095 {
17096 gsi_replace (gsi_p, gimple_build_nop (), true);
17097 break;
17098 }
17099 lower_omp_regimplify_operands (ctx, stmt, gsi_p);
17100 }
17101 break;
17102 }
17103 }
17104
17105 static void
17106 lower_omp (gimple_seq *body, omp_context *ctx)
17107 {
17108 location_t saved_location = input_location;
17109 gimple_stmt_iterator gsi;
17110 for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
17111 lower_omp_1 (&gsi, ctx);
17112 /* During gimplification, we haven't folded statments inside offloading
17113 or taskreg regions (gimplify.c:maybe_fold_stmt); do that now. */
17114 if (target_nesting_level || taskreg_nesting_level)
17115 for (gsi = gsi_start (*body); !gsi_end_p (gsi); gsi_next (&gsi))
17116 fold_stmt (&gsi);
17117 input_location = saved_location;
17118 }
17119
17120 /* Returen true if STMT is an assignment of a register-type into a local
17121 VAR_DECL. */
17122
17123 static bool
17124 grid_reg_assignment_to_local_var_p (gimple *stmt)
17125 {
17126 gassign *assign = dyn_cast <gassign *> (stmt);
17127 if (!assign)
17128 return false;
17129 tree lhs = gimple_assign_lhs (assign);
17130 if (TREE_CODE (lhs) != VAR_DECL
17131 || !is_gimple_reg_type (TREE_TYPE (lhs))
17132 || is_global_var (lhs))
17133 return false;
17134 return true;
17135 }
17136
17137 /* Return true if all statements in SEQ are assignments to local register-type
17138 variables. */
17139
17140 static bool
17141 grid_seq_only_contains_local_assignments (gimple_seq seq)
17142 {
17143 if (!seq)
17144 return true;
17145
17146 gimple_stmt_iterator gsi;
17147 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
17148 if (!grid_reg_assignment_to_local_var_p (gsi_stmt (gsi)))
17149 return false;
17150 return true;
17151 }
17152
17153 /* Scan statements in SEQ and call itself recursively on any bind. If during
17154 whole search only assignments to register-type local variables and one
17155 single OMP statement is encountered, return true, otherwise return false.
17156 RET is where we store any OMP statement encountered. TARGET_LOC and NAME
17157 are used for dumping a note about a failure. */
17158
17159 static bool
17160 grid_find_single_omp_among_assignments_1 (gimple_seq seq, location_t target_loc,
17161 const char *name, gimple **ret)
17162 {
17163 gimple_stmt_iterator gsi;
17164 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
17165 {
17166 gimple *stmt = gsi_stmt (gsi);
17167
17168 if (grid_reg_assignment_to_local_var_p (stmt))
17169 continue;
17170 if (gbind *bind = dyn_cast <gbind *> (stmt))
17171 {
17172 if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind),
17173 target_loc, name, ret))
17174 return false;
17175 }
17176 else if (is_gimple_omp (stmt))
17177 {
17178 if (*ret)
17179 {
17180 if (dump_enabled_p ())
17181 dump_printf_loc (MSG_NOTE, target_loc,
17182 "Will not turn target construct into a simple "
17183 "GPGPU kernel because %s construct contains "
17184 "multiple OpenMP constructs\n", name);
17185 return false;
17186 }
17187 *ret = stmt;
17188 }
17189 else
17190 {
17191 if (dump_enabled_p ())
17192 dump_printf_loc (MSG_NOTE, target_loc,
17193 "Will not turn target construct into a simple "
17194 "GPGPU kernel because %s construct contains "
17195 "a complex statement\n", name);
17196 return false;
17197 }
17198 }
17199 return true;
17200 }
17201
17202 /* Scan statements in SEQ and make sure that it and any binds in it contain
17203 only assignments to local register-type variables and one OMP construct. If
17204 so, return that construct, otherwise return NULL. If dumping is enabled and
17205 function fails, use TARGET_LOC and NAME to dump a note with the reason for
17206 failure. */
17207
17208 static gimple *
17209 grid_find_single_omp_among_assignments (gimple_seq seq, location_t target_loc,
17210 const char *name)
17211 {
17212 if (!seq)
17213 {
17214 if (dump_enabled_p ())
17215 dump_printf_loc (MSG_NOTE, target_loc,
17216 "Will not turn target construct into a simple "
17217 "GPGPU kernel because %s construct has empty "
17218 "body\n",
17219 name);
17220 return NULL;
17221 }
17222
17223 gimple *ret = NULL;
17224 if (grid_find_single_omp_among_assignments_1 (seq, target_loc, name, &ret))
17225 {
17226 if (!ret && dump_enabled_p ())
17227 dump_printf_loc (MSG_NOTE, target_loc,
17228 "Will not turn target construct into a simple "
17229 "GPGPU kernel because %s construct does not contain"
17230 "any other OpenMP construct\n", name);
17231 return ret;
17232 }
17233 else
17234 return NULL;
17235 }
17236
17237 /* Walker function looking for statements there is no point gridifying (and for
17238 noreturn function calls which we cannot do). Return non-NULL if such a
17239 function is found. */
17240
17241 static tree
17242 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
17243 bool *handled_ops_p,
17244 struct walk_stmt_info *)
17245 {
17246 *handled_ops_p = false;
17247 gimple *stmt = gsi_stmt (*gsi);
17248 switch (gimple_code (stmt))
17249 {
17250 case GIMPLE_CALL:
17251 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
17252 {
17253 *handled_ops_p = true;
17254 return error_mark_node;
17255 }
17256 break;
17257
17258 /* We may reduce the following list if we find a way to implement the
17259 clauses, but now there is no point trying further. */
17260 case GIMPLE_OMP_CRITICAL:
17261 case GIMPLE_OMP_TASKGROUP:
17262 case GIMPLE_OMP_TASK:
17263 case GIMPLE_OMP_SECTION:
17264 case GIMPLE_OMP_SECTIONS:
17265 case GIMPLE_OMP_SECTIONS_SWITCH:
17266 case GIMPLE_OMP_TARGET:
17267 case GIMPLE_OMP_ORDERED:
17268 *handled_ops_p = true;
17269 return error_mark_node;
17270
17271 default:
17272 break;
17273 }
17274 return NULL;
17275 }
17276
17277
17278 /* If TARGET follows a pattern that can be turned into a gridified GPGPU
17279 kernel, return true, otherwise return false. In the case of success, also
17280 fill in GROUP_SIZE_P with the requested group size or NULL if there is
17281 none. */
17282
17283 static bool
17284 grid_target_follows_gridifiable_pattern (gomp_target *target, tree *group_size_p)
17285 {
17286 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
17287 return false;
17288
17289 location_t tloc = gimple_location (target);
17290 gimple *stmt
17291 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
17292 tloc, "target");
17293 if (!stmt)
17294 return false;
17295 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
17296 tree group_size = NULL;
17297 if (!teams)
17298 {
17299 dump_printf_loc (MSG_NOTE, tloc,
17300 "Will not turn target construct into a simple "
17301 "GPGPU kernel because it does not have a sole teams "
17302 "construct in it.\n");
17303 return false;
17304 }
17305
17306 tree clauses = gimple_omp_teams_clauses (teams);
17307 while (clauses)
17308 {
17309 switch (OMP_CLAUSE_CODE (clauses))
17310 {
17311 case OMP_CLAUSE_NUM_TEAMS:
17312 if (dump_enabled_p ())
17313 dump_printf_loc (MSG_NOTE, tloc,
17314 "Will not turn target construct into a "
17315 "gridified GPGPU kernel because we cannot "
17316 "handle num_teams clause of teams "
17317 "construct\n ");
17318 return false;
17319
17320 case OMP_CLAUSE_REDUCTION:
17321 if (dump_enabled_p ())
17322 dump_printf_loc (MSG_NOTE, tloc,
17323 "Will not turn target construct into a "
17324 "gridified GPGPU kernel because a reduction "
17325 "clause is present\n ");
17326 return false;
17327
17328 case OMP_CLAUSE_LASTPRIVATE:
17329 if (dump_enabled_p ())
17330 dump_printf_loc (MSG_NOTE, tloc,
17331 "Will not turn target construct into a "
17332 "gridified GPGPU kernel because a lastprivate "
17333 "clause is present\n ");
17334 return false;
17335
17336 case OMP_CLAUSE_THREAD_LIMIT:
17337 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
17338 break;
17339
17340 default:
17341 break;
17342 }
17343 clauses = OMP_CLAUSE_CHAIN (clauses);
17344 }
17345
17346 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), tloc,
17347 "teams");
17348 if (!stmt)
17349 return false;
17350 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
17351 if (!dist)
17352 {
17353 dump_printf_loc (MSG_NOTE, tloc,
17354 "Will not turn target construct into a simple "
17355 "GPGPU kernel because the teams construct does not have "
17356 "a sole distribute construct in it.\n");
17357 return false;
17358 }
17359
17360 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
17361 if (!gimple_omp_for_combined_p (dist))
17362 {
17363 if (dump_enabled_p ())
17364 dump_printf_loc (MSG_NOTE, tloc,
17365 "Will not turn target construct into a gridified GPGPU "
17366 "kernel because we cannot handle a standalone "
17367 "distribute construct\n ");
17368 return false;
17369 }
17370 if (dist->collapse > 1)
17371 {
17372 if (dump_enabled_p ())
17373 dump_printf_loc (MSG_NOTE, tloc,
17374 "Will not turn target construct into a gridified GPGPU "
17375 "kernel because the distribute construct contains "
17376 "collapse clause\n");
17377 return false;
17378 }
17379 struct omp_for_data fd;
17380 extract_omp_for_data (dist, &fd, NULL);
17381 if (fd.chunk_size)
17382 {
17383 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
17384 {
17385 if (dump_enabled_p ())
17386 dump_printf_loc (MSG_NOTE, tloc,
17387 "Will not turn target construct into a "
17388 "gridified GPGPU kernel because the teams "
17389 "thread limit is different from distribute "
17390 "schedule chunk\n");
17391 return false;
17392 }
17393 group_size = fd.chunk_size;
17394 }
17395 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), tloc,
17396 "distribute");
17397 gomp_parallel *par;
17398 if (!stmt || !(par = dyn_cast <gomp_parallel *> (stmt)))
17399 return false;
17400
17401 clauses = gimple_omp_parallel_clauses (par);
17402 while (clauses)
17403 {
17404 switch (OMP_CLAUSE_CODE (clauses))
17405 {
17406 case OMP_CLAUSE_NUM_THREADS:
17407 if (dump_enabled_p ())
17408 dump_printf_loc (MSG_NOTE, tloc,
17409 "Will not turn target construct into a gridified"
17410 "GPGPU kernel because there is a num_threads "
17411 "clause of the parallel construct\n");
17412 return false;
17413
17414 case OMP_CLAUSE_REDUCTION:
17415 if (dump_enabled_p ())
17416 dump_printf_loc (MSG_NOTE, tloc,
17417 "Will not turn target construct into a "
17418 "gridified GPGPU kernel because a reduction "
17419 "clause is present\n ");
17420 return false;
17421
17422 case OMP_CLAUSE_LASTPRIVATE:
17423 if (dump_enabled_p ())
17424 dump_printf_loc (MSG_NOTE, tloc,
17425 "Will not turn target construct into a "
17426 "gridified GPGPU kernel because a lastprivate "
17427 "clause is present\n ");
17428 return false;
17429
17430 default:
17431 break;
17432 }
17433 clauses = OMP_CLAUSE_CHAIN (clauses);
17434 }
17435
17436 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), tloc,
17437 "parallel");
17438 gomp_for *gfor;
17439 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
17440 return false;
17441
17442 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
17443 {
17444 if (dump_enabled_p ())
17445 dump_printf_loc (MSG_NOTE, tloc,
17446 "Will not turn target construct into a gridified GPGPU "
17447 "kernel because the inner loop is not a simple for "
17448 "loop\n");
17449 return false;
17450 }
17451 if (gfor->collapse > 1)
17452 {
17453 if (dump_enabled_p ())
17454 dump_printf_loc (MSG_NOTE, tloc,
17455 "Will not turn target construct into a gridified GPGPU "
17456 "kernel because the inner loop contains collapse "
17457 "clause\n");
17458 return false;
17459 }
17460
17461 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor)))
17462 {
17463 if (dump_enabled_p ())
17464 dump_printf_loc (MSG_NOTE, tloc,
17465 "Will not turn target construct into a gridified GPGPU "
17466 "kernel because the inner loop pre_body contains"
17467 "a complex instruction\n");
17468 return false;
17469 }
17470
17471 clauses = gimple_omp_for_clauses (gfor);
17472 while (clauses)
17473 {
17474 switch (OMP_CLAUSE_CODE (clauses))
17475 {
17476 case OMP_CLAUSE_SCHEDULE:
17477 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
17478 {
17479 if (dump_enabled_p ())
17480 dump_printf_loc (MSG_NOTE, tloc,
17481 "Will not turn target construct into a "
17482 "gridified GPGPU kernel because the inner "
17483 "loop has a non-automatic scheduling clause\n");
17484 return false;
17485 }
17486 break;
17487
17488 case OMP_CLAUSE_REDUCTION:
17489 if (dump_enabled_p ())
17490 dump_printf_loc (MSG_NOTE, tloc,
17491 "Will not turn target construct into a "
17492 "gridified GPGPU kernel because a reduction "
17493 "clause is present\n ");
17494 return false;
17495
17496 case OMP_CLAUSE_LASTPRIVATE:
17497 if (dump_enabled_p ())
17498 dump_printf_loc (MSG_NOTE, tloc,
17499 "Will not turn target construct into a "
17500 "gridified GPGPU kernel because a lastprivate "
17501 "clause is present\n ");
17502 return false;
17503
17504 default:
17505 break;
17506 }
17507 clauses = OMP_CLAUSE_CHAIN (clauses);
17508 }
17509
17510 struct walk_stmt_info wi;
17511 memset (&wi, 0, sizeof (wi));
17512 if (gimple *bad = walk_gimple_seq (gimple_omp_body (gfor),
17513 grid_find_ungridifiable_statement,
17514 NULL, &wi))
17515 {
17516 if (dump_enabled_p ())
17517 {
17518 if (is_gimple_call (bad))
17519 dump_printf_loc (MSG_NOTE, tloc,
17520 "Will not turn target construct into a gridified "
17521 " GPGPU kernel because the inner loop contains "
17522 "call to a noreturn function\n");
17523 else
17524 dump_printf_loc (MSG_NOTE, tloc,
17525 "Will not turn target construct into a gridified "
17526 "GPGPU kernel because the inner loop contains "
17527 "statement %s which cannot be transformed\n",
17528 gimple_code_name[(int) gimple_code (bad)]);
17529 }
17530 return false;
17531 }
17532
17533 *group_size_p = group_size;
17534 return true;
17535 }
17536
17537 /* Operand walker, used to remap pre-body declarations according to a hash map
17538 provided in DATA. */
17539
17540 static tree
17541 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
17542 {
17543 tree t = *tp;
17544
17545 if (DECL_P (t) || TYPE_P (t))
17546 *walk_subtrees = 0;
17547 else
17548 *walk_subtrees = 1;
17549
17550 if (TREE_CODE (t) == VAR_DECL)
17551 {
17552 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
17553 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
17554 tree *repl = declmap->get (t);
17555 if (repl)
17556 *tp = *repl;
17557 }
17558 return NULL_TREE;
17559 }
17560
17561 /* Copy leading register-type assignments to local variables in SRC to just
17562 before DST, Creating temporaries, adjusting mapping of operands in WI and
17563 remapping operands as necessary. Add any new temporaries to TGT_BIND.
17564 Return the first statement that does not conform to
17565 grid_reg_assignment_to_local_var_p or NULL. */
17566
17567 static gimple *
17568 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
17569 gbind *tgt_bind, struct walk_stmt_info *wi)
17570 {
17571 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
17572 gimple_stmt_iterator gsi;
17573 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
17574 {
17575 gimple *stmt = gsi_stmt (gsi);
17576 if (gbind *bind = dyn_cast <gbind *> (stmt))
17577 {
17578 gimple *r = grid_copy_leading_local_assignments
17579 (gimple_bind_body (bind), dst, tgt_bind, wi);
17580 if (r)
17581 return r;
17582 else
17583 continue;
17584 }
17585 if (!grid_reg_assignment_to_local_var_p (stmt))
17586 return stmt;
17587 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
17588 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
17589 TREE_TYPE (lhs));
17590 DECL_CONTEXT (repl) = current_function_decl;
17591 gimple_bind_append_vars (tgt_bind, repl);
17592
17593 declmap->put (lhs, repl);
17594 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
17595 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
17596 gsi_insert_before (dst, copy, GSI_SAME_STMT);
17597 }
17598 return NULL;
17599 }
17600
17601 /* Given freshly copied top level kernel SEQ, identify the individual OMP
17602 components, mark them as part of kernel and return the inner loop, and copy
17603 assignment leading to them just before DST, remapping them using WI and
17604 adding new temporaries to TGT_BIND. */
17605
17606 static gomp_for *
17607 grid_process_kernel_body_copy (gimple_seq seq, gimple_stmt_iterator *dst,
17608 gbind *tgt_bind, struct walk_stmt_info *wi)
17609 {
17610 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, wi);
17611 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
17612 gcc_assert (teams);
17613 gimple_omp_teams_set_grid_phony (teams, true);
17614 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
17615 tgt_bind, wi);
17616 gcc_checking_assert (stmt);
17617 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
17618 gcc_assert (dist);
17619 gimple_seq prebody = gimple_omp_for_pre_body (dist);
17620 if (prebody)
17621 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, wi);
17622 gimple_omp_for_set_grid_phony (dist, true);
17623 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
17624 tgt_bind, wi);
17625 gcc_checking_assert (stmt);
17626
17627 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
17628 gimple_omp_parallel_set_grid_phony (parallel, true);
17629 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), dst,
17630 tgt_bind, wi);
17631 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
17632 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
17633 prebody = gimple_omp_for_pre_body (inner_loop);
17634 if (prebody)
17635 grid_copy_leading_local_assignments (prebody, dst, tgt_bind, wi);
17636
17637 return inner_loop;
17638 }
17639
17640 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
17641 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
17642 is the bind into which temporaries inserted before TARGET should be
17643 added. */
17644
17645 static void
17646 grid_attempt_target_gridification (gomp_target *target,
17647 gimple_stmt_iterator *gsi,
17648 gbind *tgt_bind)
17649 {
17650 tree group_size;
17651 if (!target || !grid_target_follows_gridifiable_pattern (target, &group_size))
17652 return;
17653
17654 location_t loc = gimple_location (target);
17655 if (dump_enabled_p ())
17656 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
17657 "Target construct will be turned into a gridified GPGPU "
17658 "kernel\n");
17659
17660 /* Copy target body to a GPUKERNEL construct: */
17661 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
17662 (gimple_omp_body (target));
17663
17664 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
17665 struct walk_stmt_info wi;
17666 memset (&wi, 0, sizeof (struct walk_stmt_info));
17667 wi.info = declmap;
17668
17669 /* Copy assignments in between OMP statements before target, mark OMP
17670 statements within copy appropriatly. */
17671 gomp_for *inner_loop = grid_process_kernel_body_copy (kernel_seq, gsi,
17672 tgt_bind, &wi);
17673
17674 gbind *old_bind = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
17675 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
17676 tree new_block = gimple_bind_block (new_bind);
17677 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
17678 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
17679 BLOCK_SUBBLOCKS (enc_block) = new_block;
17680 BLOCK_SUPERCONTEXT (new_block) = enc_block;
17681 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
17682 gimple_seq_add_stmt
17683 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
17684 gpukernel);
17685
17686 walk_tree (&group_size, grid_remap_prebody_decls, &wi, NULL);
17687 push_gimplify_context ();
17688 size_t collapse = gimple_omp_for_collapse (inner_loop);
17689 for (size_t i = 0; i < collapse; i++)
17690 {
17691 tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
17692 if (POINTER_TYPE_P (type))
17693 itype = signed_type_for (type);
17694 else
17695 itype = type;
17696
17697 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
17698 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
17699 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
17700 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
17701 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
17702 adjust_for_condition (loc, &cond_code, &n2);
17703 tree step;
17704 step = get_omp_for_step_from_incr (loc,
17705 gimple_omp_for_incr (inner_loop, i));
17706 gimple_seq tmpseq = NULL;
17707 n1 = fold_convert (itype, n1);
17708 n2 = fold_convert (itype, n2);
17709 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
17710 t = fold_build2 (PLUS_EXPR, itype, step, t);
17711 t = fold_build2 (PLUS_EXPR, itype, t, n2);
17712 t = fold_build2 (MINUS_EXPR, itype, t, n1);
17713 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
17714 t = fold_build2 (TRUNC_DIV_EXPR, itype,
17715 fold_build1 (NEGATE_EXPR, itype, t),
17716 fold_build1 (NEGATE_EXPR, itype, step));
17717 else
17718 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
17719 tree gs = fold_convert (uint32_type_node, t);
17720 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
17721 if (!gimple_seq_empty_p (tmpseq))
17722 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
17723
17724 tree ws;
17725 if (i == 0 && group_size)
17726 {
17727 ws = fold_convert (uint32_type_node, group_size);
17728 tmpseq = NULL;
17729 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
17730 if (!gimple_seq_empty_p (tmpseq))
17731 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
17732 }
17733 else
17734 ws = build_zero_cst (uint32_type_node);
17735
17736 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
17737 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
17738 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
17739 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
17740 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
17741 gimple_omp_target_set_clauses (target, c);
17742 }
17743 pop_gimplify_context (tgt_bind);
17744 delete declmap;
17745 return;
17746 }
17747
17748 /* Walker function doing all the work for create_target_kernels. */
17749
17750 static tree
17751 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
17752 bool *handled_ops_p,
17753 struct walk_stmt_info *incoming)
17754 {
17755 *handled_ops_p = false;
17756
17757 gimple *stmt = gsi_stmt (*gsi);
17758 gomp_target *target = dyn_cast <gomp_target *> (stmt);
17759 if (target)
17760 {
17761 gbind *tgt_bind = (gbind *) incoming->info;
17762 gcc_checking_assert (tgt_bind);
17763 grid_attempt_target_gridification (target, gsi, tgt_bind);
17764 return NULL_TREE;
17765 }
17766 gbind *bind = dyn_cast <gbind *> (stmt);
17767 if (bind)
17768 {
17769 *handled_ops_p = true;
17770 struct walk_stmt_info wi;
17771 memset (&wi, 0, sizeof (wi));
17772 wi.info = bind;
17773 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
17774 grid_gridify_all_targets_stmt, NULL, &wi);
17775 }
17776 return NULL_TREE;
17777 }
17778
17779 /* Attempt to gridify all target constructs in BODY_P. All such targets will
17780 have their bodies duplicated, with the new copy being put into a
17781 gimple_omp_grid_body statement. All kernel-related construct within the
17782 grid_body will be marked with phony flags or kernel kinds. Moreover, some
17783 re-structuring is often needed, such as copying pre-bodies before the target
17784 construct so that kernel grid sizes can be computed. */
17785
17786 static void
17787 grid_gridify_all_targets (gimple_seq *body_p)
17788 {
17789 struct walk_stmt_info wi;
17790 memset (&wi, 0, sizeof (wi));
17791 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
17792 }
17793 \f
17794
17795 /* Main entry point. */
17796
17797 static unsigned int
17798 execute_lower_omp (void)
17799 {
17800 gimple_seq body;
17801 int i;
17802 omp_context *ctx;
17803
17804 /* This pass always runs, to provide PROP_gimple_lomp.
17805 But often, there is nothing to do. */
17806 if (flag_cilkplus == 0 && flag_openacc == 0 && flag_openmp == 0
17807 && flag_openmp_simd == 0)
17808 return 0;
17809
17810 all_contexts = splay_tree_new (splay_tree_compare_pointers, 0,
17811 delete_omp_context);
17812
17813 body = gimple_body (current_function_decl);
17814
17815 if (hsa_gen_requested_p ())
17816 grid_gridify_all_targets (&body);
17817
17818 scan_omp (&body, NULL);
17819 gcc_assert (taskreg_nesting_level == 0);
17820 FOR_EACH_VEC_ELT (taskreg_contexts, i, ctx)
17821 finish_taskreg_scan (ctx);
17822 taskreg_contexts.release ();
17823
17824 if (all_contexts->root)
17825 {
17826 if (task_shared_vars)
17827 push_gimplify_context ();
17828 lower_omp (&body, NULL);
17829 if (task_shared_vars)
17830 pop_gimplify_context (NULL);
17831 }
17832
17833 if (all_contexts)
17834 {
17835 splay_tree_delete (all_contexts);
17836 all_contexts = NULL;
17837 }
17838 BITMAP_FREE (task_shared_vars);
17839 return 0;
17840 }
17841
17842 namespace {
17843
17844 const pass_data pass_data_lower_omp =
17845 {
17846 GIMPLE_PASS, /* type */
17847 "omplower", /* name */
17848 OPTGROUP_NONE, /* optinfo_flags */
17849 TV_NONE, /* tv_id */
17850 PROP_gimple_any, /* properties_required */
17851 PROP_gimple_lomp, /* properties_provided */
17852 0, /* properties_destroyed */
17853 0, /* todo_flags_start */
17854 0, /* todo_flags_finish */
17855 };
17856
17857 class pass_lower_omp : public gimple_opt_pass
17858 {
17859 public:
17860 pass_lower_omp (gcc::context *ctxt)
17861 : gimple_opt_pass (pass_data_lower_omp, ctxt)
17862 {}
17863
17864 /* opt_pass methods: */
17865 virtual unsigned int execute (function *) { return execute_lower_omp (); }
17866
17867 }; // class pass_lower_omp
17868
17869 } // anon namespace
17870
17871 gimple_opt_pass *
17872 make_pass_lower_omp (gcc::context *ctxt)
17873 {
17874 return new pass_lower_omp (ctxt);
17875 }
17876 \f
17877 /* The following is a utility to diagnose structured block violations.
17878 It is not part of the "omplower" pass, as that's invoked too late. It
17879 should be invoked by the respective front ends after gimplification. */
17880
17881 static splay_tree all_labels;
17882
17883 /* Check for mismatched contexts and generate an error if needed. Return
17884 true if an error is detected. */
17885
17886 static bool
17887 diagnose_sb_0 (gimple_stmt_iterator *gsi_p,
17888 gimple *branch_ctx, gimple *label_ctx)
17889 {
17890 gcc_checking_assert (!branch_ctx || is_gimple_omp (branch_ctx));
17891 gcc_checking_assert (!label_ctx || is_gimple_omp (label_ctx));
17892
17893 if (label_ctx == branch_ctx)
17894 return false;
17895
17896 const char* kind = NULL;
17897
17898 if (flag_cilkplus)
17899 {
17900 if ((branch_ctx
17901 && gimple_code (branch_ctx) == GIMPLE_OMP_FOR
17902 && gimple_omp_for_kind (branch_ctx) == GF_OMP_FOR_KIND_CILKSIMD)
17903 || (label_ctx
17904 && gimple_code (label_ctx) == GIMPLE_OMP_FOR
17905 && gimple_omp_for_kind (label_ctx) == GF_OMP_FOR_KIND_CILKSIMD))
17906 kind = "Cilk Plus";
17907 }
17908 if (flag_openacc)
17909 {
17910 if ((branch_ctx && is_gimple_omp_oacc (branch_ctx))
17911 || (label_ctx && is_gimple_omp_oacc (label_ctx)))
17912 {
17913 gcc_checking_assert (kind == NULL);
17914 kind = "OpenACC";
17915 }
17916 }
17917 if (kind == NULL)
17918 {
17919 gcc_checking_assert (flag_openmp);
17920 kind = "OpenMP";
17921 }
17922
17923 /*
17924 Previously we kept track of the label's entire context in diagnose_sb_[12]
17925 so we could traverse it and issue a correct "exit" or "enter" error
17926 message upon a structured block violation.
17927
17928 We built the context by building a list with tree_cons'ing, but there is
17929 no easy counterpart in gimple tuples. It seems like far too much work
17930 for issuing exit/enter error messages. If someone really misses the
17931 distinct error message... patches welcome.
17932 */
17933
17934 #if 0
17935 /* Try to avoid confusing the user by producing and error message
17936 with correct "exit" or "enter" verbiage. We prefer "exit"
17937 unless we can show that LABEL_CTX is nested within BRANCH_CTX. */
17938 if (branch_ctx == NULL)
17939 exit_p = false;
17940 else
17941 {
17942 while (label_ctx)
17943 {
17944 if (TREE_VALUE (label_ctx) == branch_ctx)
17945 {
17946 exit_p = false;
17947 break;
17948 }
17949 label_ctx = TREE_CHAIN (label_ctx);
17950 }
17951 }
17952
17953 if (exit_p)
17954 error ("invalid exit from %s structured block", kind);
17955 else
17956 error ("invalid entry to %s structured block", kind);
17957 #endif
17958
17959 /* If it's obvious we have an invalid entry, be specific about the error. */
17960 if (branch_ctx == NULL)
17961 error ("invalid entry to %s structured block", kind);
17962 else
17963 {
17964 /* Otherwise, be vague and lazy, but efficient. */
17965 error ("invalid branch to/from %s structured block", kind);
17966 }
17967
17968 gsi_replace (gsi_p, gimple_build_nop (), false);
17969 return true;
17970 }
17971
17972 /* Pass 1: Create a minimal tree of structured blocks, and record
17973 where each label is found. */
17974
17975 static tree
17976 diagnose_sb_1 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
17977 struct walk_stmt_info *wi)
17978 {
17979 gimple *context = (gimple *) wi->info;
17980 gimple *inner_context;
17981 gimple *stmt = gsi_stmt (*gsi_p);
17982
17983 *handled_ops_p = true;
17984
17985 switch (gimple_code (stmt))
17986 {
17987 WALK_SUBSTMTS;
17988
17989 case GIMPLE_OMP_PARALLEL:
17990 case GIMPLE_OMP_TASK:
17991 case GIMPLE_OMP_SECTIONS:
17992 case GIMPLE_OMP_SINGLE:
17993 case GIMPLE_OMP_SECTION:
17994 case GIMPLE_OMP_MASTER:
17995 case GIMPLE_OMP_ORDERED:
17996 case GIMPLE_OMP_CRITICAL:
17997 case GIMPLE_OMP_TARGET:
17998 case GIMPLE_OMP_TEAMS:
17999 case GIMPLE_OMP_TASKGROUP:
18000 /* The minimal context here is just the current OMP construct. */
18001 inner_context = stmt;
18002 wi->info = inner_context;
18003 walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
18004 wi->info = context;
18005 break;
18006
18007 case GIMPLE_OMP_FOR:
18008 inner_context = stmt;
18009 wi->info = inner_context;
18010 /* gimple_omp_for_{index,initial,final} are all DECLs; no need to
18011 walk them. */
18012 walk_gimple_seq (gimple_omp_for_pre_body (stmt),
18013 diagnose_sb_1, NULL, wi);
18014 walk_gimple_seq (gimple_omp_body (stmt), diagnose_sb_1, NULL, wi);
18015 wi->info = context;
18016 break;
18017
18018 case GIMPLE_LABEL:
18019 splay_tree_insert (all_labels,
18020 (splay_tree_key) gimple_label_label (
18021 as_a <glabel *> (stmt)),
18022 (splay_tree_value) context);
18023 break;
18024
18025 default:
18026 break;
18027 }
18028
18029 return NULL_TREE;
18030 }
18031
18032 /* Pass 2: Check each branch and see if its context differs from that of
18033 the destination label's context. */
18034
18035 static tree
18036 diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p,
18037 struct walk_stmt_info *wi)
18038 {
18039 gimple *context = (gimple *) wi->info;
18040 splay_tree_node n;
18041 gimple *stmt = gsi_stmt (*gsi_p);
18042
18043 *handled_ops_p = true;
18044
18045 switch (gimple_code (stmt))
18046 {
18047 WALK_SUBSTMTS;
18048
18049 case GIMPLE_OMP_PARALLEL:
18050 case GIMPLE_OMP_TASK:
18051 case GIMPLE_OMP_SECTIONS:
18052 case GIMPLE_OMP_SINGLE:
18053 case GIMPLE_OMP_SECTION:
18054 case GIMPLE_OMP_MASTER:
18055 case GIMPLE_OMP_ORDERED:
18056 case GIMPLE_OMP_CRITICAL:
18057 case GIMPLE_OMP_TARGET:
18058 case GIMPLE_OMP_TEAMS:
18059 case GIMPLE_OMP_TASKGROUP:
18060 wi->info = stmt;
18061 walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), diagnose_sb_2, NULL, wi);
18062 wi->info = context;
18063 break;
18064
18065 case GIMPLE_OMP_FOR:
18066 wi->info = stmt;
18067 /* gimple_omp_for_{index,initial,final} are all DECLs; no need to
18068 walk them. */
18069 walk_gimple_seq_mod (gimple_omp_for_pre_body_ptr (stmt),
18070 diagnose_sb_2, NULL, wi);
18071 walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), diagnose_sb_2, NULL, wi);
18072 wi->info = context;
18073 break;
18074
18075 case GIMPLE_COND:
18076 {
18077 gcond *cond_stmt = as_a <gcond *> (stmt);
18078 tree lab = gimple_cond_true_label (cond_stmt);
18079 if (lab)
18080 {
18081 n = splay_tree_lookup (all_labels,
18082 (splay_tree_key) lab);
18083 diagnose_sb_0 (gsi_p, context,
18084 n ? (gimple *) n->value : NULL);
18085 }
18086 lab = gimple_cond_false_label (cond_stmt);
18087 if (lab)
18088 {
18089 n = splay_tree_lookup (all_labels,
18090 (splay_tree_key) lab);
18091 diagnose_sb_0 (gsi_p, context,
18092 n ? (gimple *) n->value : NULL);
18093 }
18094 }
18095 break;
18096
18097 case GIMPLE_GOTO:
18098 {
18099 tree lab = gimple_goto_dest (stmt);
18100 if (TREE_CODE (lab) != LABEL_DECL)
18101 break;
18102
18103 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
18104 diagnose_sb_0 (gsi_p, context, n ? (gimple *) n->value : NULL);
18105 }
18106 break;
18107
18108 case GIMPLE_SWITCH:
18109 {
18110 gswitch *switch_stmt = as_a <gswitch *> (stmt);
18111 unsigned int i;
18112 for (i = 0; i < gimple_switch_num_labels (switch_stmt); ++i)
18113 {
18114 tree lab = CASE_LABEL (gimple_switch_label (switch_stmt, i));
18115 n = splay_tree_lookup (all_labels, (splay_tree_key) lab);
18116 if (n && diagnose_sb_0 (gsi_p, context, (gimple *) n->value))
18117 break;
18118 }
18119 }
18120 break;
18121
18122 case GIMPLE_RETURN:
18123 diagnose_sb_0 (gsi_p, context, NULL);
18124 break;
18125
18126 default:
18127 break;
18128 }
18129
18130 return NULL_TREE;
18131 }
18132
18133 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
18134 GIMPLE_* codes. */
18135 bool
18136 make_gimple_omp_edges (basic_block bb, struct omp_region **region,
18137 int *region_idx)
18138 {
18139 gimple *last = last_stmt (bb);
18140 enum gimple_code code = gimple_code (last);
18141 struct omp_region *cur_region = *region;
18142 bool fallthru = false;
18143
18144 switch (code)
18145 {
18146 case GIMPLE_OMP_PARALLEL:
18147 case GIMPLE_OMP_TASK:
18148 case GIMPLE_OMP_FOR:
18149 case GIMPLE_OMP_SINGLE:
18150 case GIMPLE_OMP_TEAMS:
18151 case GIMPLE_OMP_MASTER:
18152 case GIMPLE_OMP_TASKGROUP:
18153 case GIMPLE_OMP_CRITICAL:
18154 case GIMPLE_OMP_SECTION:
18155 case GIMPLE_OMP_GRID_BODY:
18156 cur_region = new_omp_region (bb, code, cur_region);
18157 fallthru = true;
18158 break;
18159
18160 case GIMPLE_OMP_ORDERED:
18161 cur_region = new_omp_region (bb, code, cur_region);
18162 fallthru = true;
18163 if (find_omp_clause (gimple_omp_ordered_clauses
18164 (as_a <gomp_ordered *> (last)),
18165 OMP_CLAUSE_DEPEND))
18166 cur_region = cur_region->outer;
18167 break;
18168
18169 case GIMPLE_OMP_TARGET:
18170 cur_region = new_omp_region (bb, code, cur_region);
18171 fallthru = true;
18172 switch (gimple_omp_target_kind (last))
18173 {
18174 case GF_OMP_TARGET_KIND_REGION:
18175 case GF_OMP_TARGET_KIND_DATA:
18176 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
18177 case GF_OMP_TARGET_KIND_OACC_KERNELS:
18178 case GF_OMP_TARGET_KIND_OACC_DATA:
18179 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
18180 break;
18181 case GF_OMP_TARGET_KIND_UPDATE:
18182 case GF_OMP_TARGET_KIND_ENTER_DATA:
18183 case GF_OMP_TARGET_KIND_EXIT_DATA:
18184 case GF_OMP_TARGET_KIND_OACC_UPDATE:
18185 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
18186 case GF_OMP_TARGET_KIND_OACC_DECLARE:
18187 cur_region = cur_region->outer;
18188 break;
18189 default:
18190 gcc_unreachable ();
18191 }
18192 break;
18193
18194 case GIMPLE_OMP_SECTIONS:
18195 cur_region = new_omp_region (bb, code, cur_region);
18196 fallthru = true;
18197 break;
18198
18199 case GIMPLE_OMP_SECTIONS_SWITCH:
18200 fallthru = false;
18201 break;
18202
18203 case GIMPLE_OMP_ATOMIC_LOAD:
18204 case GIMPLE_OMP_ATOMIC_STORE:
18205 fallthru = true;
18206 break;
18207
18208 case GIMPLE_OMP_RETURN:
18209 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
18210 somewhere other than the next block. This will be
18211 created later. */
18212 cur_region->exit = bb;
18213 if (cur_region->type == GIMPLE_OMP_TASK)
18214 /* Add an edge corresponding to not scheduling the task
18215 immediately. */
18216 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
18217 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
18218 cur_region = cur_region->outer;
18219 break;
18220
18221 case GIMPLE_OMP_CONTINUE:
18222 cur_region->cont = bb;
18223 switch (cur_region->type)
18224 {
18225 case GIMPLE_OMP_FOR:
18226 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
18227 succs edges as abnormal to prevent splitting
18228 them. */
18229 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
18230 /* Make the loopback edge. */
18231 make_edge (bb, single_succ (cur_region->entry),
18232 EDGE_ABNORMAL);
18233
18234 /* Create an edge from GIMPLE_OMP_FOR to exit, which
18235 corresponds to the case that the body of the loop
18236 is not executed at all. */
18237 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
18238 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
18239 fallthru = false;
18240 break;
18241
18242 case GIMPLE_OMP_SECTIONS:
18243 /* Wire up the edges into and out of the nested sections. */
18244 {
18245 basic_block switch_bb = single_succ (cur_region->entry);
18246
18247 struct omp_region *i;
18248 for (i = cur_region->inner; i ; i = i->next)
18249 {
18250 gcc_assert (i->type == GIMPLE_OMP_SECTION);
18251 make_edge (switch_bb, i->entry, 0);
18252 make_edge (i->exit, bb, EDGE_FALLTHRU);
18253 }
18254
18255 /* Make the loopback edge to the block with
18256 GIMPLE_OMP_SECTIONS_SWITCH. */
18257 make_edge (bb, switch_bb, 0);
18258
18259 /* Make the edge from the switch to exit. */
18260 make_edge (switch_bb, bb->next_bb, 0);
18261 fallthru = false;
18262 }
18263 break;
18264
18265 case GIMPLE_OMP_TASK:
18266 fallthru = true;
18267 break;
18268
18269 default:
18270 gcc_unreachable ();
18271 }
18272 break;
18273
18274 default:
18275 gcc_unreachable ();
18276 }
18277
18278 if (*region != cur_region)
18279 {
18280 *region = cur_region;
18281 if (cur_region)
18282 *region_idx = cur_region->entry->index;
18283 else
18284 *region_idx = 0;
18285 }
18286
18287 return fallthru;
18288 }
18289
18290 static unsigned int
18291 diagnose_omp_structured_block_errors (void)
18292 {
18293 struct walk_stmt_info wi;
18294 gimple_seq body = gimple_body (current_function_decl);
18295
18296 all_labels = splay_tree_new (splay_tree_compare_pointers, 0, 0);
18297
18298 memset (&wi, 0, sizeof (wi));
18299 walk_gimple_seq (body, diagnose_sb_1, NULL, &wi);
18300
18301 memset (&wi, 0, sizeof (wi));
18302 wi.want_locations = true;
18303 walk_gimple_seq_mod (&body, diagnose_sb_2, NULL, &wi);
18304
18305 gimple_set_body (current_function_decl, body);
18306
18307 splay_tree_delete (all_labels);
18308 all_labels = NULL;
18309
18310 return 0;
18311 }
18312
18313 namespace {
18314
18315 const pass_data pass_data_diagnose_omp_blocks =
18316 {
18317 GIMPLE_PASS, /* type */
18318 "*diagnose_omp_blocks", /* name */
18319 OPTGROUP_NONE, /* optinfo_flags */
18320 TV_NONE, /* tv_id */
18321 PROP_gimple_any, /* properties_required */
18322 0, /* properties_provided */
18323 0, /* properties_destroyed */
18324 0, /* todo_flags_start */
18325 0, /* todo_flags_finish */
18326 };
18327
18328 class pass_diagnose_omp_blocks : public gimple_opt_pass
18329 {
18330 public:
18331 pass_diagnose_omp_blocks (gcc::context *ctxt)
18332 : gimple_opt_pass (pass_data_diagnose_omp_blocks, ctxt)
18333 {}
18334
18335 /* opt_pass methods: */
18336 virtual bool gate (function *)
18337 {
18338 return flag_cilkplus || flag_openacc || flag_openmp;
18339 }
18340 virtual unsigned int execute (function *)
18341 {
18342 return diagnose_omp_structured_block_errors ();
18343 }
18344
18345 }; // class pass_diagnose_omp_blocks
18346
18347 } // anon namespace
18348
18349 gimple_opt_pass *
18350 make_pass_diagnose_omp_blocks (gcc::context *ctxt)
18351 {
18352 return new pass_diagnose_omp_blocks (ctxt);
18353 }
18354 \f
18355 /* SIMD clone supporting code. */
18356
18357 /* Allocate a fresh `simd_clone' and return it. NARGS is the number
18358 of arguments to reserve space for. */
18359
18360 static struct cgraph_simd_clone *
18361 simd_clone_struct_alloc (int nargs)
18362 {
18363 struct cgraph_simd_clone *clone_info;
18364 size_t len = (sizeof (struct cgraph_simd_clone)
18365 + nargs * sizeof (struct cgraph_simd_clone_arg));
18366 clone_info = (struct cgraph_simd_clone *)
18367 ggc_internal_cleared_alloc (len);
18368 return clone_info;
18369 }
18370
18371 /* Make a copy of the `struct cgraph_simd_clone' in FROM to TO. */
18372
18373 static inline void
18374 simd_clone_struct_copy (struct cgraph_simd_clone *to,
18375 struct cgraph_simd_clone *from)
18376 {
18377 memcpy (to, from, (sizeof (struct cgraph_simd_clone)
18378 + ((from->nargs - from->inbranch)
18379 * sizeof (struct cgraph_simd_clone_arg))));
18380 }
18381
18382 /* Return vector of parameter types of function FNDECL. This uses
18383 TYPE_ARG_TYPES if available, otherwise falls back to types of
18384 DECL_ARGUMENTS types. */
18385
18386 vec<tree>
18387 simd_clone_vector_of_formal_parm_types (tree fndecl)
18388 {
18389 if (TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
18390 return ipa_get_vector_of_formal_parm_types (TREE_TYPE (fndecl));
18391 vec<tree> args = ipa_get_vector_of_formal_parms (fndecl);
18392 unsigned int i;
18393 tree arg;
18394 FOR_EACH_VEC_ELT (args, i, arg)
18395 args[i] = TREE_TYPE (args[i]);
18396 return args;
18397 }
18398
18399 /* Given a simd function in NODE, extract the simd specific
18400 information from the OMP clauses passed in CLAUSES, and return
18401 the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
18402 is set to TRUE if the `inbranch' or `notinbranch' clause specified,
18403 otherwise set to FALSE. */
18404
18405 static struct cgraph_simd_clone *
18406 simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
18407 bool *inbranch_specified)
18408 {
18409 vec<tree> args = simd_clone_vector_of_formal_parm_types (node->decl);
18410 tree t;
18411 int n;
18412 *inbranch_specified = false;
18413
18414 n = args.length ();
18415 if (n > 0 && args.last () == void_type_node)
18416 n--;
18417
18418 /* To distinguish from an OpenMP simd clone, Cilk Plus functions to
18419 be cloned have a distinctive artificial label in addition to "omp
18420 declare simd". */
18421 bool cilk_clone
18422 = (flag_cilkplus
18423 && lookup_attribute ("cilk simd function",
18424 DECL_ATTRIBUTES (node->decl)));
18425
18426 /* Allocate one more than needed just in case this is an in-branch
18427 clone which will require a mask argument. */
18428 struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
18429 clone_info->nargs = n;
18430 clone_info->cilk_elemental = cilk_clone;
18431
18432 if (!clauses)
18433 {
18434 args.release ();
18435 return clone_info;
18436 }
18437 clauses = TREE_VALUE (clauses);
18438 if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
18439 return clone_info;
18440
18441 for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
18442 {
18443 switch (OMP_CLAUSE_CODE (t))
18444 {
18445 case OMP_CLAUSE_INBRANCH:
18446 clone_info->inbranch = 1;
18447 *inbranch_specified = true;
18448 break;
18449 case OMP_CLAUSE_NOTINBRANCH:
18450 clone_info->inbranch = 0;
18451 *inbranch_specified = true;
18452 break;
18453 case OMP_CLAUSE_SIMDLEN:
18454 clone_info->simdlen
18455 = TREE_INT_CST_LOW (OMP_CLAUSE_SIMDLEN_EXPR (t));
18456 break;
18457 case OMP_CLAUSE_LINEAR:
18458 {
18459 tree decl = OMP_CLAUSE_DECL (t);
18460 tree step = OMP_CLAUSE_LINEAR_STEP (t);
18461 int argno = TREE_INT_CST_LOW (decl);
18462 if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (t))
18463 {
18464 enum cgraph_simd_clone_arg_type arg_type;
18465 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
18466 switch (OMP_CLAUSE_LINEAR_KIND (t))
18467 {
18468 case OMP_CLAUSE_LINEAR_REF:
18469 arg_type
18470 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP;
18471 break;
18472 case OMP_CLAUSE_LINEAR_UVAL:
18473 arg_type
18474 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP;
18475 break;
18476 case OMP_CLAUSE_LINEAR_VAL:
18477 case OMP_CLAUSE_LINEAR_DEFAULT:
18478 arg_type
18479 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP;
18480 break;
18481 default:
18482 gcc_unreachable ();
18483 }
18484 else
18485 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
18486 clone_info->args[argno].arg_type = arg_type;
18487 clone_info->args[argno].linear_step = tree_to_shwi (step);
18488 gcc_assert (clone_info->args[argno].linear_step >= 0
18489 && clone_info->args[argno].linear_step < n);
18490 }
18491 else
18492 {
18493 if (POINTER_TYPE_P (args[argno]))
18494 step = fold_convert (ssizetype, step);
18495 if (!tree_fits_shwi_p (step))
18496 {
18497 warning_at (OMP_CLAUSE_LOCATION (t), 0,
18498 "ignoring large linear step");
18499 args.release ();
18500 return NULL;
18501 }
18502 else if (integer_zerop (step))
18503 {
18504 warning_at (OMP_CLAUSE_LOCATION (t), 0,
18505 "ignoring zero linear step");
18506 args.release ();
18507 return NULL;
18508 }
18509 else
18510 {
18511 enum cgraph_simd_clone_arg_type arg_type;
18512 if (TREE_CODE (args[argno]) == REFERENCE_TYPE)
18513 switch (OMP_CLAUSE_LINEAR_KIND (t))
18514 {
18515 case OMP_CLAUSE_LINEAR_REF:
18516 arg_type
18517 = SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP;
18518 break;
18519 case OMP_CLAUSE_LINEAR_UVAL:
18520 arg_type
18521 = SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP;
18522 break;
18523 case OMP_CLAUSE_LINEAR_VAL:
18524 case OMP_CLAUSE_LINEAR_DEFAULT:
18525 arg_type
18526 = SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP;
18527 break;
18528 default:
18529 gcc_unreachable ();
18530 }
18531 else
18532 arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP;
18533 clone_info->args[argno].arg_type = arg_type;
18534 clone_info->args[argno].linear_step = tree_to_shwi (step);
18535 }
18536 }
18537 break;
18538 }
18539 case OMP_CLAUSE_UNIFORM:
18540 {
18541 tree decl = OMP_CLAUSE_DECL (t);
18542 int argno = tree_to_uhwi (decl);
18543 clone_info->args[argno].arg_type
18544 = SIMD_CLONE_ARG_TYPE_UNIFORM;
18545 break;
18546 }
18547 case OMP_CLAUSE_ALIGNED:
18548 {
18549 tree decl = OMP_CLAUSE_DECL (t);
18550 int argno = tree_to_uhwi (decl);
18551 clone_info->args[argno].alignment
18552 = TREE_INT_CST_LOW (OMP_CLAUSE_ALIGNED_ALIGNMENT (t));
18553 break;
18554 }
18555 default:
18556 break;
18557 }
18558 }
18559 args.release ();
18560 return clone_info;
18561 }
18562
18563 /* Given a SIMD clone in NODE, calculate the characteristic data
18564 type and return the coresponding type. The characteristic data
18565 type is computed as described in the Intel Vector ABI. */
18566
18567 static tree
18568 simd_clone_compute_base_data_type (struct cgraph_node *node,
18569 struct cgraph_simd_clone *clone_info)
18570 {
18571 tree type = integer_type_node;
18572 tree fndecl = node->decl;
18573
18574 /* a) For non-void function, the characteristic data type is the
18575 return type. */
18576 if (TREE_CODE (TREE_TYPE (TREE_TYPE (fndecl))) != VOID_TYPE)
18577 type = TREE_TYPE (TREE_TYPE (fndecl));
18578
18579 /* b) If the function has any non-uniform, non-linear parameters,
18580 then the characteristic data type is the type of the first
18581 such parameter. */
18582 else
18583 {
18584 vec<tree> map = simd_clone_vector_of_formal_parm_types (fndecl);
18585 for (unsigned int i = 0; i < clone_info->nargs; ++i)
18586 if (clone_info->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
18587 {
18588 type = map[i];
18589 break;
18590 }
18591 map.release ();
18592 }
18593
18594 /* c) If the characteristic data type determined by a) or b) above
18595 is struct, union, or class type which is pass-by-value (except
18596 for the type that maps to the built-in complex data type), the
18597 characteristic data type is int. */
18598 if (RECORD_OR_UNION_TYPE_P (type)
18599 && !aggregate_value_p (type, NULL)
18600 && TREE_CODE (type) != COMPLEX_TYPE)
18601 return integer_type_node;
18602
18603 /* d) If none of the above three classes is applicable, the
18604 characteristic data type is int. */
18605
18606 return type;
18607
18608 /* e) For Intel Xeon Phi native and offload compilation, if the
18609 resulting characteristic data type is 8-bit or 16-bit integer
18610 data type, the characteristic data type is int. */
18611 /* Well, we don't handle Xeon Phi yet. */
18612 }
18613
18614 static tree
18615 simd_clone_mangle (struct cgraph_node *node,
18616 struct cgraph_simd_clone *clone_info)
18617 {
18618 char vecsize_mangle = clone_info->vecsize_mangle;
18619 char mask = clone_info->inbranch ? 'M' : 'N';
18620 unsigned int simdlen = clone_info->simdlen;
18621 unsigned int n;
18622 pretty_printer pp;
18623
18624 gcc_assert (vecsize_mangle && simdlen);
18625
18626 pp_string (&pp, "_ZGV");
18627 pp_character (&pp, vecsize_mangle);
18628 pp_character (&pp, mask);
18629 pp_decimal_int (&pp, simdlen);
18630
18631 for (n = 0; n < clone_info->nargs; ++n)
18632 {
18633 struct cgraph_simd_clone_arg arg = clone_info->args[n];
18634
18635 switch (arg.arg_type)
18636 {
18637 case SIMD_CLONE_ARG_TYPE_UNIFORM:
18638 pp_character (&pp, 'u');
18639 break;
18640 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
18641 pp_character (&pp, 'l');
18642 goto mangle_linear;
18643 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
18644 pp_character (&pp, 'R');
18645 goto mangle_linear;
18646 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
18647 pp_character (&pp, 'L');
18648 goto mangle_linear;
18649 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
18650 pp_character (&pp, 'U');
18651 goto mangle_linear;
18652 mangle_linear:
18653 gcc_assert (arg.linear_step != 0);
18654 if (arg.linear_step > 1)
18655 pp_unsigned_wide_integer (&pp, arg.linear_step);
18656 else if (arg.linear_step < 0)
18657 {
18658 pp_character (&pp, 'n');
18659 pp_unsigned_wide_integer (&pp, (-(unsigned HOST_WIDE_INT)
18660 arg.linear_step));
18661 }
18662 break;
18663 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
18664 pp_string (&pp, "ls");
18665 pp_unsigned_wide_integer (&pp, arg.linear_step);
18666 break;
18667 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
18668 pp_string (&pp, "Rs");
18669 pp_unsigned_wide_integer (&pp, arg.linear_step);
18670 break;
18671 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
18672 pp_string (&pp, "Ls");
18673 pp_unsigned_wide_integer (&pp, arg.linear_step);
18674 break;
18675 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
18676 pp_string (&pp, "Us");
18677 pp_unsigned_wide_integer (&pp, arg.linear_step);
18678 break;
18679 default:
18680 pp_character (&pp, 'v');
18681 }
18682 if (arg.alignment)
18683 {
18684 pp_character (&pp, 'a');
18685 pp_decimal_int (&pp, arg.alignment);
18686 }
18687 }
18688
18689 pp_underscore (&pp);
18690 const char *str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (node->decl));
18691 if (*str == '*')
18692 ++str;
18693 pp_string (&pp, str);
18694 str = pp_formatted_text (&pp);
18695
18696 /* If there already is a SIMD clone with the same mangled name, don't
18697 add another one. This can happen e.g. for
18698 #pragma omp declare simd
18699 #pragma omp declare simd simdlen(8)
18700 int foo (int, int);
18701 if the simdlen is assumed to be 8 for the first one, etc. */
18702 for (struct cgraph_node *clone = node->simd_clones; clone;
18703 clone = clone->simdclone->next_clone)
18704 if (strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (clone->decl)),
18705 str) == 0)
18706 return NULL_TREE;
18707
18708 return get_identifier (str);
18709 }
18710
18711 /* Create a simd clone of OLD_NODE and return it. */
18712
18713 static struct cgraph_node *
18714 simd_clone_create (struct cgraph_node *old_node)
18715 {
18716 struct cgraph_node *new_node;
18717 if (old_node->definition)
18718 {
18719 if (!old_node->has_gimple_body_p ())
18720 return NULL;
18721 old_node->get_body ();
18722 new_node = old_node->create_version_clone_with_body (vNULL, NULL, NULL,
18723 false, NULL, NULL,
18724 "simdclone");
18725 }
18726 else
18727 {
18728 tree old_decl = old_node->decl;
18729 tree new_decl = copy_node (old_node->decl);
18730 DECL_NAME (new_decl) = clone_function_name (old_decl, "simdclone");
18731 SET_DECL_ASSEMBLER_NAME (new_decl, DECL_NAME (new_decl));
18732 SET_DECL_RTL (new_decl, NULL);
18733 DECL_STATIC_CONSTRUCTOR (new_decl) = 0;
18734 DECL_STATIC_DESTRUCTOR (new_decl) = 0;
18735 new_node = old_node->create_version_clone (new_decl, vNULL, NULL);
18736 if (old_node->in_other_partition)
18737 new_node->in_other_partition = 1;
18738 }
18739 if (new_node == NULL)
18740 return new_node;
18741
18742 TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl);
18743
18744 /* The function cgraph_function_versioning () will force the new
18745 symbol local. Undo this, and inherit external visability from
18746 the old node. */
18747 new_node->local.local = old_node->local.local;
18748 new_node->externally_visible = old_node->externally_visible;
18749
18750 return new_node;
18751 }
18752
18753 /* Adjust the return type of the given function to its appropriate
18754 vector counterpart. Returns a simd array to be used throughout the
18755 function as a return value. */
18756
18757 static tree
18758 simd_clone_adjust_return_type (struct cgraph_node *node)
18759 {
18760 tree fndecl = node->decl;
18761 tree orig_rettype = TREE_TYPE (TREE_TYPE (fndecl));
18762 unsigned int veclen;
18763 tree t;
18764
18765 /* Adjust the function return type. */
18766 if (orig_rettype == void_type_node)
18767 return NULL_TREE;
18768 TREE_TYPE (fndecl) = build_distinct_type_copy (TREE_TYPE (fndecl));
18769 t = TREE_TYPE (TREE_TYPE (fndecl));
18770 if (INTEGRAL_TYPE_P (t) || POINTER_TYPE_P (t))
18771 veclen = node->simdclone->vecsize_int;
18772 else
18773 veclen = node->simdclone->vecsize_float;
18774 veclen /= GET_MODE_BITSIZE (TYPE_MODE (t));
18775 if (veclen > node->simdclone->simdlen)
18776 veclen = node->simdclone->simdlen;
18777 if (POINTER_TYPE_P (t))
18778 t = pointer_sized_int_node;
18779 if (veclen == node->simdclone->simdlen)
18780 t = build_vector_type (t, node->simdclone->simdlen);
18781 else
18782 {
18783 t = build_vector_type (t, veclen);
18784 t = build_array_type_nelts (t, node->simdclone->simdlen / veclen);
18785 }
18786 TREE_TYPE (TREE_TYPE (fndecl)) = t;
18787 if (!node->definition)
18788 return NULL_TREE;
18789
18790 t = DECL_RESULT (fndecl);
18791 /* Adjust the DECL_RESULT. */
18792 gcc_assert (TREE_TYPE (t) != void_type_node);
18793 TREE_TYPE (t) = TREE_TYPE (TREE_TYPE (fndecl));
18794 relayout_decl (t);
18795
18796 tree atype = build_array_type_nelts (orig_rettype,
18797 node->simdclone->simdlen);
18798 if (veclen != node->simdclone->simdlen)
18799 return build1 (VIEW_CONVERT_EXPR, atype, t);
18800
18801 /* Set up a SIMD array to use as the return value. */
18802 tree retval = create_tmp_var_raw (atype, "retval");
18803 gimple_add_tmp_var (retval);
18804 return retval;
18805 }
18806
18807 /* Each vector argument has a corresponding array to be used locally
18808 as part of the eventual loop. Create such temporary array and
18809 return it.
18810
18811 PREFIX is the prefix to be used for the temporary.
18812
18813 TYPE is the inner element type.
18814
18815 SIMDLEN is the number of elements. */
18816
18817 static tree
18818 create_tmp_simd_array (const char *prefix, tree type, int simdlen)
18819 {
18820 tree atype = build_array_type_nelts (type, simdlen);
18821 tree avar = create_tmp_var_raw (atype, prefix);
18822 gimple_add_tmp_var (avar);
18823 return avar;
18824 }
18825
18826 /* Modify the function argument types to their corresponding vector
18827 counterparts if appropriate. Also, create one array for each simd
18828 argument to be used locally when using the function arguments as
18829 part of the loop.
18830
18831 NODE is the function whose arguments are to be adjusted.
18832
18833 Returns an adjustment vector that will be filled describing how the
18834 argument types will be adjusted. */
18835
18836 static ipa_parm_adjustment_vec
18837 simd_clone_adjust_argument_types (struct cgraph_node *node)
18838 {
18839 vec<tree> args;
18840 ipa_parm_adjustment_vec adjustments;
18841
18842 if (node->definition)
18843 args = ipa_get_vector_of_formal_parms (node->decl);
18844 else
18845 args = simd_clone_vector_of_formal_parm_types (node->decl);
18846 adjustments.create (args.length ());
18847 unsigned i, j, veclen;
18848 struct ipa_parm_adjustment adj;
18849 for (i = 0; i < node->simdclone->nargs; ++i)
18850 {
18851 memset (&adj, 0, sizeof (adj));
18852 tree parm = args[i];
18853 tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
18854 adj.base_index = i;
18855 adj.base = parm;
18856
18857 node->simdclone->args[i].orig_arg = node->definition ? parm : NULL_TREE;
18858 node->simdclone->args[i].orig_type = parm_type;
18859
18860 switch (node->simdclone->args[i].arg_type)
18861 {
18862 default:
18863 /* No adjustment necessary for scalar arguments. */
18864 adj.op = IPA_PARM_OP_COPY;
18865 break;
18866 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
18867 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
18868 if (node->definition)
18869 node->simdclone->args[i].simd_array
18870 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
18871 TREE_TYPE (parm_type),
18872 node->simdclone->simdlen);
18873 adj.op = IPA_PARM_OP_COPY;
18874 break;
18875 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
18876 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
18877 case SIMD_CLONE_ARG_TYPE_VECTOR:
18878 if (INTEGRAL_TYPE_P (parm_type) || POINTER_TYPE_P (parm_type))
18879 veclen = node->simdclone->vecsize_int;
18880 else
18881 veclen = node->simdclone->vecsize_float;
18882 veclen /= GET_MODE_BITSIZE (TYPE_MODE (parm_type));
18883 if (veclen > node->simdclone->simdlen)
18884 veclen = node->simdclone->simdlen;
18885 adj.arg_prefix = "simd";
18886 if (POINTER_TYPE_P (parm_type))
18887 adj.type = build_vector_type (pointer_sized_int_node, veclen);
18888 else
18889 adj.type = build_vector_type (parm_type, veclen);
18890 node->simdclone->args[i].vector_type = adj.type;
18891 for (j = veclen; j < node->simdclone->simdlen; j += veclen)
18892 {
18893 adjustments.safe_push (adj);
18894 if (j == veclen)
18895 {
18896 memset (&adj, 0, sizeof (adj));
18897 adj.op = IPA_PARM_OP_NEW;
18898 adj.arg_prefix = "simd";
18899 adj.base_index = i;
18900 adj.type = node->simdclone->args[i].vector_type;
18901 }
18902 }
18903
18904 if (node->definition)
18905 node->simdclone->args[i].simd_array
18906 = create_tmp_simd_array (IDENTIFIER_POINTER (DECL_NAME (parm)),
18907 parm_type, node->simdclone->simdlen);
18908 }
18909 adjustments.safe_push (adj);
18910 }
18911
18912 if (node->simdclone->inbranch)
18913 {
18914 tree base_type
18915 = simd_clone_compute_base_data_type (node->simdclone->origin,
18916 node->simdclone);
18917
18918 memset (&adj, 0, sizeof (adj));
18919 adj.op = IPA_PARM_OP_NEW;
18920 adj.arg_prefix = "mask";
18921
18922 adj.base_index = i;
18923 if (INTEGRAL_TYPE_P (base_type) || POINTER_TYPE_P (base_type))
18924 veclen = node->simdclone->vecsize_int;
18925 else
18926 veclen = node->simdclone->vecsize_float;
18927 veclen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
18928 if (veclen > node->simdclone->simdlen)
18929 veclen = node->simdclone->simdlen;
18930 if (POINTER_TYPE_P (base_type))
18931 adj.type = build_vector_type (pointer_sized_int_node, veclen);
18932 else
18933 adj.type = build_vector_type (base_type, veclen);
18934 adjustments.safe_push (adj);
18935
18936 for (j = veclen; j < node->simdclone->simdlen; j += veclen)
18937 adjustments.safe_push (adj);
18938
18939 /* We have previously allocated one extra entry for the mask. Use
18940 it and fill it. */
18941 struct cgraph_simd_clone *sc = node->simdclone;
18942 sc->nargs++;
18943 if (node->definition)
18944 {
18945 sc->args[i].orig_arg
18946 = build_decl (UNKNOWN_LOCATION, PARM_DECL, NULL, base_type);
18947 sc->args[i].simd_array
18948 = create_tmp_simd_array ("mask", base_type, sc->simdlen);
18949 }
18950 sc->args[i].orig_type = base_type;
18951 sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
18952 }
18953
18954 if (node->definition)
18955 ipa_modify_formal_parameters (node->decl, adjustments);
18956 else
18957 {
18958 tree new_arg_types = NULL_TREE, new_reversed;
18959 bool last_parm_void = false;
18960 if (args.length () > 0 && args.last () == void_type_node)
18961 last_parm_void = true;
18962
18963 gcc_assert (TYPE_ARG_TYPES (TREE_TYPE (node->decl)));
18964 j = adjustments.length ();
18965 for (i = 0; i < j; i++)
18966 {
18967 struct ipa_parm_adjustment *adj = &adjustments[i];
18968 tree ptype;
18969 if (adj->op == IPA_PARM_OP_COPY)
18970 ptype = args[adj->base_index];
18971 else
18972 ptype = adj->type;
18973 new_arg_types = tree_cons (NULL_TREE, ptype, new_arg_types);
18974 }
18975 new_reversed = nreverse (new_arg_types);
18976 if (last_parm_void)
18977 {
18978 if (new_reversed)
18979 TREE_CHAIN (new_arg_types) = void_list_node;
18980 else
18981 new_reversed = void_list_node;
18982 }
18983
18984 tree new_type = build_distinct_type_copy (TREE_TYPE (node->decl));
18985 TYPE_ARG_TYPES (new_type) = new_reversed;
18986 TREE_TYPE (node->decl) = new_type;
18987
18988 adjustments.release ();
18989 }
18990 args.release ();
18991 return adjustments;
18992 }
18993
18994 /* Initialize and copy the function arguments in NODE to their
18995 corresponding local simd arrays. Returns a fresh gimple_seq with
18996 the instruction sequence generated. */
18997
18998 static gimple_seq
18999 simd_clone_init_simd_arrays (struct cgraph_node *node,
19000 ipa_parm_adjustment_vec adjustments)
19001 {
19002 gimple_seq seq = NULL;
19003 unsigned i = 0, j = 0, k;
19004
19005 for (tree arg = DECL_ARGUMENTS (node->decl);
19006 arg;
19007 arg = DECL_CHAIN (arg), i++, j++)
19008 {
19009 if (adjustments[j].op == IPA_PARM_OP_COPY
19010 || POINTER_TYPE_P (TREE_TYPE (arg)))
19011 continue;
19012
19013 node->simdclone->args[i].vector_arg = arg;
19014
19015 tree array = node->simdclone->args[i].simd_array;
19016 if (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg)) == node->simdclone->simdlen)
19017 {
19018 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
19019 tree ptr = build_fold_addr_expr (array);
19020 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
19021 build_int_cst (ptype, 0));
19022 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
19023 gimplify_and_add (t, &seq);
19024 }
19025 else
19026 {
19027 unsigned int simdlen = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg));
19028 tree ptype = build_pointer_type (TREE_TYPE (TREE_TYPE (array)));
19029 for (k = 0; k < node->simdclone->simdlen; k += simdlen)
19030 {
19031 tree ptr = build_fold_addr_expr (array);
19032 int elemsize;
19033 if (k)
19034 {
19035 arg = DECL_CHAIN (arg);
19036 j++;
19037 }
19038 elemsize
19039 = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (TREE_TYPE (arg))));
19040 tree t = build2 (MEM_REF, TREE_TYPE (arg), ptr,
19041 build_int_cst (ptype, k * elemsize));
19042 t = build2 (MODIFY_EXPR, TREE_TYPE (t), t, arg);
19043 gimplify_and_add (t, &seq);
19044 }
19045 }
19046 }
19047 return seq;
19048 }
19049
19050 /* Callback info for ipa_simd_modify_stmt_ops below. */
19051
19052 struct modify_stmt_info {
19053 ipa_parm_adjustment_vec adjustments;
19054 gimple *stmt;
19055 /* True if the parent statement was modified by
19056 ipa_simd_modify_stmt_ops. */
19057 bool modified;
19058 };
19059
19060 /* Callback for walk_gimple_op.
19061
19062 Adjust operands from a given statement as specified in the
19063 adjustments vector in the callback data. */
19064
19065 static tree
19066 ipa_simd_modify_stmt_ops (tree *tp, int *walk_subtrees, void *data)
19067 {
19068 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
19069 struct modify_stmt_info *info = (struct modify_stmt_info *) wi->info;
19070 tree *orig_tp = tp;
19071 if (TREE_CODE (*tp) == ADDR_EXPR)
19072 tp = &TREE_OPERAND (*tp, 0);
19073 struct ipa_parm_adjustment *cand = NULL;
19074 if (TREE_CODE (*tp) == PARM_DECL)
19075 cand = ipa_get_adjustment_candidate (&tp, NULL, info->adjustments, true);
19076 else
19077 {
19078 if (TYPE_P (*tp))
19079 *walk_subtrees = 0;
19080 }
19081
19082 tree repl = NULL_TREE;
19083 if (cand)
19084 repl = unshare_expr (cand->new_decl);
19085 else
19086 {
19087 if (tp != orig_tp)
19088 {
19089 *walk_subtrees = 0;
19090 bool modified = info->modified;
19091 info->modified = false;
19092 walk_tree (tp, ipa_simd_modify_stmt_ops, wi, wi->pset);
19093 if (!info->modified)
19094 {
19095 info->modified = modified;
19096 return NULL_TREE;
19097 }
19098 info->modified = modified;
19099 repl = *tp;
19100 }
19101 else
19102 return NULL_TREE;
19103 }
19104
19105 if (tp != orig_tp)
19106 {
19107 repl = build_fold_addr_expr (repl);
19108 gimple *stmt;
19109 if (is_gimple_debug (info->stmt))
19110 {
19111 tree vexpr = make_node (DEBUG_EXPR_DECL);
19112 stmt = gimple_build_debug_source_bind (vexpr, repl, NULL);
19113 DECL_ARTIFICIAL (vexpr) = 1;
19114 TREE_TYPE (vexpr) = TREE_TYPE (repl);
19115 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (repl));
19116 repl = vexpr;
19117 }
19118 else
19119 {
19120 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
19121 repl = gimple_assign_lhs (stmt);
19122 }
19123 gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
19124 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
19125 *orig_tp = repl;
19126 }
19127 else if (!useless_type_conversion_p (TREE_TYPE (*tp), TREE_TYPE (repl)))
19128 {
19129 tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (*tp), repl);
19130 *tp = vce;
19131 }
19132 else
19133 *tp = repl;
19134
19135 info->modified = true;
19136 return NULL_TREE;
19137 }
19138
19139 /* Traverse the function body and perform all modifications as
19140 described in ADJUSTMENTS. At function return, ADJUSTMENTS will be
19141 modified such that the replacement/reduction value will now be an
19142 offset into the corresponding simd_array.
19143
19144 This function will replace all function argument uses with their
19145 corresponding simd array elements, and ajust the return values
19146 accordingly. */
19147
19148 static void
19149 ipa_simd_modify_function_body (struct cgraph_node *node,
19150 ipa_parm_adjustment_vec adjustments,
19151 tree retval_array, tree iter)
19152 {
19153 basic_block bb;
19154 unsigned int i, j, l;
19155
19156 /* Re-use the adjustments array, but this time use it to replace
19157 every function argument use to an offset into the corresponding
19158 simd_array. */
19159 for (i = 0, j = 0; i < node->simdclone->nargs; ++i, ++j)
19160 {
19161 if (!node->simdclone->args[i].vector_arg)
19162 continue;
19163
19164 tree basetype = TREE_TYPE (node->simdclone->args[i].orig_arg);
19165 tree vectype = TREE_TYPE (node->simdclone->args[i].vector_arg);
19166 adjustments[j].new_decl
19167 = build4 (ARRAY_REF,
19168 basetype,
19169 node->simdclone->args[i].simd_array,
19170 iter,
19171 NULL_TREE, NULL_TREE);
19172 if (adjustments[j].op == IPA_PARM_OP_NONE
19173 && TYPE_VECTOR_SUBPARTS (vectype) < node->simdclone->simdlen)
19174 j += node->simdclone->simdlen / TYPE_VECTOR_SUBPARTS (vectype) - 1;
19175 }
19176
19177 l = adjustments.length ();
19178 for (i = 1; i < num_ssa_names; i++)
19179 {
19180 tree name = ssa_name (i);
19181 if (name
19182 && SSA_NAME_VAR (name)
19183 && TREE_CODE (SSA_NAME_VAR (name)) == PARM_DECL)
19184 {
19185 for (j = 0; j < l; j++)
19186 if (SSA_NAME_VAR (name) == adjustments[j].base
19187 && adjustments[j].new_decl)
19188 {
19189 tree base_var;
19190 if (adjustments[j].new_ssa_base == NULL_TREE)
19191 {
19192 base_var
19193 = copy_var_decl (adjustments[j].base,
19194 DECL_NAME (adjustments[j].base),
19195 TREE_TYPE (adjustments[j].base));
19196 adjustments[j].new_ssa_base = base_var;
19197 }
19198 else
19199 base_var = adjustments[j].new_ssa_base;
19200 if (SSA_NAME_IS_DEFAULT_DEF (name))
19201 {
19202 bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
19203 gimple_stmt_iterator gsi = gsi_after_labels (bb);
19204 tree new_decl = unshare_expr (adjustments[j].new_decl);
19205 set_ssa_default_def (cfun, adjustments[j].base, NULL_TREE);
19206 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
19207 SSA_NAME_IS_DEFAULT_DEF (name) = 0;
19208 gimple *stmt = gimple_build_assign (name, new_decl);
19209 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
19210 }
19211 else
19212 SET_SSA_NAME_VAR_OR_IDENTIFIER (name, base_var);
19213 }
19214 }
19215 }
19216
19217 struct modify_stmt_info info;
19218 info.adjustments = adjustments;
19219
19220 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
19221 {
19222 gimple_stmt_iterator gsi;
19223
19224 gsi = gsi_start_bb (bb);
19225 while (!gsi_end_p (gsi))
19226 {
19227 gimple *stmt = gsi_stmt (gsi);
19228 info.stmt = stmt;
19229 struct walk_stmt_info wi;
19230
19231 memset (&wi, 0, sizeof (wi));
19232 info.modified = false;
19233 wi.info = &info;
19234 walk_gimple_op (stmt, ipa_simd_modify_stmt_ops, &wi);
19235
19236 if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
19237 {
19238 tree retval = gimple_return_retval (return_stmt);
19239 if (!retval)
19240 {
19241 gsi_remove (&gsi, true);
19242 continue;
19243 }
19244
19245 /* Replace `return foo' with `retval_array[iter] = foo'. */
19246 tree ref = build4 (ARRAY_REF, TREE_TYPE (retval),
19247 retval_array, iter, NULL, NULL);
19248 stmt = gimple_build_assign (ref, retval);
19249 gsi_replace (&gsi, stmt, true);
19250 info.modified = true;
19251 }
19252
19253 if (info.modified)
19254 {
19255 update_stmt (stmt);
19256 if (maybe_clean_eh_stmt (stmt))
19257 gimple_purge_dead_eh_edges (gimple_bb (stmt));
19258 }
19259 gsi_next (&gsi);
19260 }
19261 }
19262 }
19263
19264 /* Helper function of simd_clone_adjust, return linear step addend
19265 of Ith argument. */
19266
19267 static tree
19268 simd_clone_linear_addend (struct cgraph_node *node, unsigned int i,
19269 tree addtype, basic_block entry_bb)
19270 {
19271 tree ptype = NULL_TREE;
19272 switch (node->simdclone->args[i].arg_type)
19273 {
19274 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
19275 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
19276 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
19277 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
19278 return build_int_cst (addtype, node->simdclone->args[i].linear_step);
19279 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
19280 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
19281 ptype = TREE_TYPE (node->simdclone->args[i].orig_arg);
19282 break;
19283 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
19284 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
19285 ptype = TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg));
19286 break;
19287 default:
19288 gcc_unreachable ();
19289 }
19290
19291 unsigned int idx = node->simdclone->args[i].linear_step;
19292 tree arg = node->simdclone->args[idx].orig_arg;
19293 gcc_assert (is_gimple_reg_type (TREE_TYPE (arg)));
19294 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
19295 gimple *g;
19296 tree ret;
19297 if (is_gimple_reg (arg))
19298 ret = get_or_create_ssa_default_def (cfun, arg);
19299 else
19300 {
19301 g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
19302 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19303 ret = gimple_assign_lhs (g);
19304 }
19305 if (TREE_CODE (TREE_TYPE (arg)) == REFERENCE_TYPE)
19306 {
19307 g = gimple_build_assign (make_ssa_name (TREE_TYPE (TREE_TYPE (arg))),
19308 build_simple_mem_ref (ret));
19309 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19310 ret = gimple_assign_lhs (g);
19311 }
19312 if (!useless_type_conversion_p (addtype, TREE_TYPE (ret)))
19313 {
19314 g = gimple_build_assign (make_ssa_name (addtype), NOP_EXPR, ret);
19315 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19316 ret = gimple_assign_lhs (g);
19317 }
19318 if (POINTER_TYPE_P (ptype))
19319 {
19320 tree size = TYPE_SIZE_UNIT (TREE_TYPE (ptype));
19321 if (size && TREE_CODE (size) == INTEGER_CST)
19322 {
19323 g = gimple_build_assign (make_ssa_name (addtype), MULT_EXPR,
19324 ret, fold_convert (addtype, size));
19325 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19326 ret = gimple_assign_lhs (g);
19327 }
19328 }
19329 return ret;
19330 }
19331
19332 /* Adjust the argument types in NODE to their appropriate vector
19333 counterparts. */
19334
19335 static void
19336 simd_clone_adjust (struct cgraph_node *node)
19337 {
19338 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
19339
19340 targetm.simd_clone.adjust (node);
19341
19342 tree retval = simd_clone_adjust_return_type (node);
19343 ipa_parm_adjustment_vec adjustments
19344 = simd_clone_adjust_argument_types (node);
19345
19346 push_gimplify_context ();
19347
19348 gimple_seq seq = simd_clone_init_simd_arrays (node, adjustments);
19349
19350 /* Adjust all uses of vector arguments accordingly. Adjust all
19351 return values accordingly. */
19352 tree iter = create_tmp_var (unsigned_type_node, "iter");
19353 tree iter1 = make_ssa_name (iter);
19354 tree iter2 = make_ssa_name (iter);
19355 ipa_simd_modify_function_body (node, adjustments, retval, iter1);
19356
19357 /* Initialize the iteration variable. */
19358 basic_block entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
19359 basic_block body_bb = split_block_after_labels (entry_bb)->dest;
19360 gimple_stmt_iterator gsi = gsi_after_labels (entry_bb);
19361 /* Insert the SIMD array and iv initialization at function
19362 entry. */
19363 gsi_insert_seq_before (&gsi, seq, GSI_NEW_STMT);
19364
19365 pop_gimplify_context (NULL);
19366
19367 /* Create a new BB right before the original exit BB, to hold the
19368 iteration increment and the condition/branch. */
19369 basic_block orig_exit = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), 0)->src;
19370 basic_block incr_bb = create_empty_bb (orig_exit);
19371 add_bb_to_loop (incr_bb, body_bb->loop_father);
19372 /* The succ of orig_exit was EXIT_BLOCK_PTR_FOR_FN (cfun), with an empty
19373 flag. Set it now to be a FALLTHRU_EDGE. */
19374 gcc_assert (EDGE_COUNT (orig_exit->succs) == 1);
19375 EDGE_SUCC (orig_exit, 0)->flags |= EDGE_FALLTHRU;
19376 for (unsigned i = 0;
19377 i < EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); ++i)
19378 {
19379 edge e = EDGE_PRED (EXIT_BLOCK_PTR_FOR_FN (cfun), i);
19380 redirect_edge_succ (e, incr_bb);
19381 }
19382 edge e = make_edge (incr_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
19383 e->probability = REG_BR_PROB_BASE;
19384 gsi = gsi_last_bb (incr_bb);
19385 gimple *g = gimple_build_assign (iter2, PLUS_EXPR, iter1,
19386 build_int_cst (unsigned_type_node, 1));
19387 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19388
19389 /* Mostly annotate the loop for the vectorizer (the rest is done below). */
19390 struct loop *loop = alloc_loop ();
19391 cfun->has_force_vectorize_loops = true;
19392 loop->safelen = node->simdclone->simdlen;
19393 loop->force_vectorize = true;
19394 loop->header = body_bb;
19395
19396 /* Branch around the body if the mask applies. */
19397 if (node->simdclone->inbranch)
19398 {
19399 gimple_stmt_iterator gsi = gsi_last_bb (loop->header);
19400 tree mask_array
19401 = node->simdclone->args[node->simdclone->nargs - 1].simd_array;
19402 tree mask = make_ssa_name (TREE_TYPE (TREE_TYPE (mask_array)));
19403 tree aref = build4 (ARRAY_REF,
19404 TREE_TYPE (TREE_TYPE (mask_array)),
19405 mask_array, iter1,
19406 NULL, NULL);
19407 g = gimple_build_assign (mask, aref);
19408 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19409 int bitsize = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (aref)));
19410 if (!INTEGRAL_TYPE_P (TREE_TYPE (aref)))
19411 {
19412 aref = build1 (VIEW_CONVERT_EXPR,
19413 build_nonstandard_integer_type (bitsize, 0), mask);
19414 mask = make_ssa_name (TREE_TYPE (aref));
19415 g = gimple_build_assign (mask, aref);
19416 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19417 }
19418
19419 g = gimple_build_cond (EQ_EXPR, mask, build_zero_cst (TREE_TYPE (mask)),
19420 NULL, NULL);
19421 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19422 make_edge (loop->header, incr_bb, EDGE_TRUE_VALUE);
19423 FALLTHRU_EDGE (loop->header)->flags = EDGE_FALSE_VALUE;
19424 }
19425
19426 /* Generate the condition. */
19427 g = gimple_build_cond (LT_EXPR,
19428 iter2,
19429 build_int_cst (unsigned_type_node,
19430 node->simdclone->simdlen),
19431 NULL, NULL);
19432 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19433 e = split_block (incr_bb, gsi_stmt (gsi));
19434 basic_block latch_bb = e->dest;
19435 basic_block new_exit_bb;
19436 new_exit_bb = split_block_after_labels (latch_bb)->dest;
19437 loop->latch = latch_bb;
19438
19439 redirect_edge_succ (FALLTHRU_EDGE (latch_bb), body_bb);
19440
19441 make_edge (incr_bb, new_exit_bb, EDGE_FALSE_VALUE);
19442 /* The successor of incr_bb is already pointing to latch_bb; just
19443 change the flags.
19444 make_edge (incr_bb, latch_bb, EDGE_TRUE_VALUE); */
19445 FALLTHRU_EDGE (incr_bb)->flags = EDGE_TRUE_VALUE;
19446
19447 gphi *phi = create_phi_node (iter1, body_bb);
19448 edge preheader_edge = find_edge (entry_bb, body_bb);
19449 edge latch_edge = single_succ_edge (latch_bb);
19450 add_phi_arg (phi, build_zero_cst (unsigned_type_node), preheader_edge,
19451 UNKNOWN_LOCATION);
19452 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
19453
19454 /* Generate the new return. */
19455 gsi = gsi_last_bb (new_exit_bb);
19456 if (retval
19457 && TREE_CODE (retval) == VIEW_CONVERT_EXPR
19458 && TREE_CODE (TREE_OPERAND (retval, 0)) == RESULT_DECL)
19459 retval = TREE_OPERAND (retval, 0);
19460 else if (retval)
19461 {
19462 retval = build1 (VIEW_CONVERT_EXPR,
19463 TREE_TYPE (TREE_TYPE (node->decl)),
19464 retval);
19465 retval = force_gimple_operand_gsi (&gsi, retval, true, NULL,
19466 false, GSI_CONTINUE_LINKING);
19467 }
19468 g = gimple_build_return (retval);
19469 gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
19470
19471 /* Handle aligned clauses by replacing default defs of the aligned
19472 uniform args with __builtin_assume_aligned (arg_N(D), alignment)
19473 lhs. Handle linear by adding PHIs. */
19474 for (unsigned i = 0; i < node->simdclone->nargs; i++)
19475 if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
19476 && (TREE_ADDRESSABLE (node->simdclone->args[i].orig_arg)
19477 || !is_gimple_reg_type
19478 (TREE_TYPE (node->simdclone->args[i].orig_arg))))
19479 {
19480 tree orig_arg = node->simdclone->args[i].orig_arg;
19481 if (is_gimple_reg_type (TREE_TYPE (orig_arg)))
19482 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
19483 else
19484 {
19485 iter1 = create_tmp_var_raw (TREE_TYPE (orig_arg));
19486 gimple_add_tmp_var (iter1);
19487 }
19488 gsi = gsi_after_labels (entry_bb);
19489 g = gimple_build_assign (iter1, orig_arg);
19490 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19491 gsi = gsi_after_labels (body_bb);
19492 g = gimple_build_assign (orig_arg, iter1);
19493 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19494 }
19495 else if (node->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM
19496 && DECL_BY_REFERENCE (node->simdclone->args[i].orig_arg)
19497 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
19498 == REFERENCE_TYPE
19499 && TREE_ADDRESSABLE
19500 (TREE_TYPE (TREE_TYPE (node->simdclone->args[i].orig_arg))))
19501 {
19502 tree orig_arg = node->simdclone->args[i].orig_arg;
19503 tree def = ssa_default_def (cfun, orig_arg);
19504 if (def && !has_zero_uses (def))
19505 {
19506 iter1 = create_tmp_var_raw (TREE_TYPE (TREE_TYPE (orig_arg)));
19507 gimple_add_tmp_var (iter1);
19508 gsi = gsi_after_labels (entry_bb);
19509 g = gimple_build_assign (iter1, build_simple_mem_ref (def));
19510 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19511 gsi = gsi_after_labels (body_bb);
19512 g = gimple_build_assign (build_simple_mem_ref (def), iter1);
19513 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19514 }
19515 }
19516 else if (node->simdclone->args[i].alignment
19517 && node->simdclone->args[i].arg_type
19518 == SIMD_CLONE_ARG_TYPE_UNIFORM
19519 && (node->simdclone->args[i].alignment
19520 & (node->simdclone->args[i].alignment - 1)) == 0
19521 && TREE_CODE (TREE_TYPE (node->simdclone->args[i].orig_arg))
19522 == POINTER_TYPE)
19523 {
19524 unsigned int alignment = node->simdclone->args[i].alignment;
19525 tree orig_arg = node->simdclone->args[i].orig_arg;
19526 tree def = ssa_default_def (cfun, orig_arg);
19527 if (def && !has_zero_uses (def))
19528 {
19529 tree fn = builtin_decl_explicit (BUILT_IN_ASSUME_ALIGNED);
19530 gimple_seq seq = NULL;
19531 bool need_cvt = false;
19532 gcall *call
19533 = gimple_build_call (fn, 2, def, size_int (alignment));
19534 g = call;
19535 if (!useless_type_conversion_p (TREE_TYPE (orig_arg),
19536 ptr_type_node))
19537 need_cvt = true;
19538 tree t = make_ssa_name (need_cvt ? ptr_type_node : orig_arg);
19539 gimple_call_set_lhs (g, t);
19540 gimple_seq_add_stmt_without_update (&seq, g);
19541 if (need_cvt)
19542 {
19543 t = make_ssa_name (orig_arg);
19544 g = gimple_build_assign (t, NOP_EXPR, gimple_call_lhs (g));
19545 gimple_seq_add_stmt_without_update (&seq, g);
19546 }
19547 gsi_insert_seq_on_edge_immediate
19548 (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
19549
19550 entry_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
19551 int freq = compute_call_stmt_bb_frequency (current_function_decl,
19552 entry_bb);
19553 node->create_edge (cgraph_node::get_create (fn),
19554 call, entry_bb->count, freq);
19555
19556 imm_use_iterator iter;
19557 use_operand_p use_p;
19558 gimple *use_stmt;
19559 tree repl = gimple_get_lhs (g);
19560 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
19561 if (is_gimple_debug (use_stmt) || use_stmt == call)
19562 continue;
19563 else
19564 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
19565 SET_USE (use_p, repl);
19566 }
19567 }
19568 else if ((node->simdclone->args[i].arg_type
19569 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
19570 || (node->simdclone->args[i].arg_type
19571 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP)
19572 || (node->simdclone->args[i].arg_type
19573 == SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP)
19574 || (node->simdclone->args[i].arg_type
19575 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP))
19576 {
19577 tree orig_arg = node->simdclone->args[i].orig_arg;
19578 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
19579 || POINTER_TYPE_P (TREE_TYPE (orig_arg)));
19580 tree def = NULL_TREE;
19581 if (TREE_ADDRESSABLE (orig_arg))
19582 {
19583 def = make_ssa_name (TREE_TYPE (orig_arg));
19584 iter1 = make_ssa_name (TREE_TYPE (orig_arg));
19585 iter2 = make_ssa_name (TREE_TYPE (orig_arg));
19586 gsi = gsi_after_labels (entry_bb);
19587 g = gimple_build_assign (def, orig_arg);
19588 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19589 }
19590 else
19591 {
19592 def = ssa_default_def (cfun, orig_arg);
19593 if (!def || has_zero_uses (def))
19594 def = NULL_TREE;
19595 else
19596 {
19597 iter1 = make_ssa_name (orig_arg);
19598 iter2 = make_ssa_name (orig_arg);
19599 }
19600 }
19601 if (def)
19602 {
19603 phi = create_phi_node (iter1, body_bb);
19604 add_phi_arg (phi, def, preheader_edge, UNKNOWN_LOCATION);
19605 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
19606 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
19607 ? PLUS_EXPR : POINTER_PLUS_EXPR;
19608 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (orig_arg))
19609 ? TREE_TYPE (orig_arg) : sizetype;
19610 tree addcst = simd_clone_linear_addend (node, i, addtype,
19611 entry_bb);
19612 gsi = gsi_last_bb (incr_bb);
19613 g = gimple_build_assign (iter2, code, iter1, addcst);
19614 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19615
19616 imm_use_iterator iter;
19617 use_operand_p use_p;
19618 gimple *use_stmt;
19619 if (TREE_ADDRESSABLE (orig_arg))
19620 {
19621 gsi = gsi_after_labels (body_bb);
19622 g = gimple_build_assign (orig_arg, iter1);
19623 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
19624 }
19625 else
19626 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
19627 if (use_stmt == phi)
19628 continue;
19629 else
19630 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
19631 SET_USE (use_p, iter1);
19632 }
19633 }
19634 else if (node->simdclone->args[i].arg_type
19635 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
19636 || (node->simdclone->args[i].arg_type
19637 == SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP))
19638 {
19639 tree orig_arg = node->simdclone->args[i].orig_arg;
19640 tree def = ssa_default_def (cfun, orig_arg);
19641 gcc_assert (!TREE_ADDRESSABLE (orig_arg)
19642 && TREE_CODE (TREE_TYPE (orig_arg)) == REFERENCE_TYPE);
19643 if (def && !has_zero_uses (def))
19644 {
19645 tree rtype = TREE_TYPE (TREE_TYPE (orig_arg));
19646 iter1 = make_ssa_name (orig_arg);
19647 iter2 = make_ssa_name (orig_arg);
19648 tree iter3 = make_ssa_name (rtype);
19649 tree iter4 = make_ssa_name (rtype);
19650 tree iter5 = make_ssa_name (rtype);
19651 gsi = gsi_after_labels (entry_bb);
19652 gimple *load
19653 = gimple_build_assign (iter3, build_simple_mem_ref (def));
19654 gsi_insert_before (&gsi, load, GSI_NEW_STMT);
19655
19656 tree array = node->simdclone->args[i].simd_array;
19657 TREE_ADDRESSABLE (array) = 1;
19658 tree ptr = build_fold_addr_expr (array);
19659 phi = create_phi_node (iter1, body_bb);
19660 add_phi_arg (phi, ptr, preheader_edge, UNKNOWN_LOCATION);
19661 add_phi_arg (phi, iter2, latch_edge, UNKNOWN_LOCATION);
19662 g = gimple_build_assign (iter2, POINTER_PLUS_EXPR, iter1,
19663 TYPE_SIZE_UNIT (TREE_TYPE (iter3)));
19664 gsi = gsi_last_bb (incr_bb);
19665 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19666
19667 phi = create_phi_node (iter4, body_bb);
19668 add_phi_arg (phi, iter3, preheader_edge, UNKNOWN_LOCATION);
19669 add_phi_arg (phi, iter5, latch_edge, UNKNOWN_LOCATION);
19670 enum tree_code code = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
19671 ? PLUS_EXPR : POINTER_PLUS_EXPR;
19672 tree addtype = INTEGRAL_TYPE_P (TREE_TYPE (iter3))
19673 ? TREE_TYPE (iter3) : sizetype;
19674 tree addcst = simd_clone_linear_addend (node, i, addtype,
19675 entry_bb);
19676 g = gimple_build_assign (iter5, code, iter4, addcst);
19677 gsi = gsi_last_bb (incr_bb);
19678 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19679
19680 g = gimple_build_assign (build_simple_mem_ref (iter1), iter4);
19681 gsi = gsi_after_labels (body_bb);
19682 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19683
19684 imm_use_iterator iter;
19685 use_operand_p use_p;
19686 gimple *use_stmt;
19687 FOR_EACH_IMM_USE_STMT (use_stmt, iter, def)
19688 if (use_stmt == load)
19689 continue;
19690 else
19691 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
19692 SET_USE (use_p, iter1);
19693
19694 if (!TYPE_READONLY (rtype))
19695 {
19696 tree v = make_ssa_name (rtype);
19697 tree aref = build4 (ARRAY_REF, rtype, array,
19698 size_zero_node, NULL_TREE,
19699 NULL_TREE);
19700 gsi = gsi_after_labels (new_exit_bb);
19701 g = gimple_build_assign (v, aref);
19702 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19703 g = gimple_build_assign (build_simple_mem_ref (def), v);
19704 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
19705 }
19706 }
19707 }
19708
19709 calculate_dominance_info (CDI_DOMINATORS);
19710 add_loop (loop, loop->header->loop_father);
19711 update_ssa (TODO_update_ssa);
19712
19713 pop_cfun ();
19714 }
19715
19716 /* If the function in NODE is tagged as an elemental SIMD function,
19717 create the appropriate SIMD clones. */
19718
19719 static void
19720 expand_simd_clones (struct cgraph_node *node)
19721 {
19722 tree attr = lookup_attribute ("omp declare simd",
19723 DECL_ATTRIBUTES (node->decl));
19724 if (attr == NULL_TREE
19725 || node->global.inlined_to
19726 || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
19727 return;
19728
19729 /* Ignore
19730 #pragma omp declare simd
19731 extern int foo ();
19732 in C, there we don't know the argument types at all. */
19733 if (!node->definition
19734 && TYPE_ARG_TYPES (TREE_TYPE (node->decl)) == NULL_TREE)
19735 return;
19736
19737 /* Call this before creating clone_info, as it might ggc_collect. */
19738 if (node->definition && node->has_gimple_body_p ())
19739 node->get_body ();
19740
19741 do
19742 {
19743 /* Start with parsing the "omp declare simd" attribute(s). */
19744 bool inbranch_clause_specified;
19745 struct cgraph_simd_clone *clone_info
19746 = simd_clone_clauses_extract (node, TREE_VALUE (attr),
19747 &inbranch_clause_specified);
19748 if (clone_info == NULL)
19749 continue;
19750
19751 int orig_simdlen = clone_info->simdlen;
19752 tree base_type = simd_clone_compute_base_data_type (node, clone_info);
19753 /* The target can return 0 (no simd clones should be created),
19754 1 (just one ISA of simd clones should be created) or higher
19755 count of ISA variants. In that case, clone_info is initialized
19756 for the first ISA variant. */
19757 int count
19758 = targetm.simd_clone.compute_vecsize_and_simdlen (node, clone_info,
19759 base_type, 0);
19760 if (count == 0)
19761 continue;
19762
19763 /* Loop over all COUNT ISA variants, and if !INBRANCH_CLAUSE_SPECIFIED,
19764 also create one inbranch and one !inbranch clone of it. */
19765 for (int i = 0; i < count * 2; i++)
19766 {
19767 struct cgraph_simd_clone *clone = clone_info;
19768 if (inbranch_clause_specified && (i & 1) != 0)
19769 continue;
19770
19771 if (i != 0)
19772 {
19773 clone = simd_clone_struct_alloc (clone_info->nargs
19774 + ((i & 1) != 0));
19775 simd_clone_struct_copy (clone, clone_info);
19776 /* Undo changes targetm.simd_clone.compute_vecsize_and_simdlen
19777 and simd_clone_adjust_argument_types did to the first
19778 clone's info. */
19779 clone->nargs -= clone_info->inbranch;
19780 clone->simdlen = orig_simdlen;
19781 /* And call the target hook again to get the right ISA. */
19782 targetm.simd_clone.compute_vecsize_and_simdlen (node, clone,
19783 base_type,
19784 i / 2);
19785 if ((i & 1) != 0)
19786 clone->inbranch = 1;
19787 }
19788
19789 /* simd_clone_mangle might fail if such a clone has been created
19790 already. */
19791 tree id = simd_clone_mangle (node, clone);
19792 if (id == NULL_TREE)
19793 continue;
19794
19795 /* Only when we are sure we want to create the clone actually
19796 clone the function (or definitions) or create another
19797 extern FUNCTION_DECL (for prototypes without definitions). */
19798 struct cgraph_node *n = simd_clone_create (node);
19799 if (n == NULL)
19800 continue;
19801
19802 n->simdclone = clone;
19803 clone->origin = node;
19804 clone->next_clone = NULL;
19805 if (node->simd_clones == NULL)
19806 {
19807 clone->prev_clone = n;
19808 node->simd_clones = n;
19809 }
19810 else
19811 {
19812 clone->prev_clone = node->simd_clones->simdclone->prev_clone;
19813 clone->prev_clone->simdclone->next_clone = n;
19814 node->simd_clones->simdclone->prev_clone = n;
19815 }
19816 symtab->change_decl_assembler_name (n->decl, id);
19817 /* And finally adjust the return type, parameters and for
19818 definitions also function body. */
19819 if (node->definition)
19820 simd_clone_adjust (n);
19821 else
19822 {
19823 simd_clone_adjust_return_type (n);
19824 simd_clone_adjust_argument_types (n);
19825 }
19826 }
19827 }
19828 while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
19829 }
19830
19831 /* Entry point for IPA simd clone creation pass. */
19832
19833 static unsigned int
19834 ipa_omp_simd_clone (void)
19835 {
19836 struct cgraph_node *node;
19837 FOR_EACH_FUNCTION (node)
19838 expand_simd_clones (node);
19839 return 0;
19840 }
19841
19842 namespace {
19843
19844 const pass_data pass_data_omp_simd_clone =
19845 {
19846 SIMPLE_IPA_PASS, /* type */
19847 "simdclone", /* name */
19848 OPTGROUP_NONE, /* optinfo_flags */
19849 TV_NONE, /* tv_id */
19850 ( PROP_ssa | PROP_cfg ), /* properties_required */
19851 0, /* properties_provided */
19852 0, /* properties_destroyed */
19853 0, /* todo_flags_start */
19854 0, /* todo_flags_finish */
19855 };
19856
19857 class pass_omp_simd_clone : public simple_ipa_opt_pass
19858 {
19859 public:
19860 pass_omp_simd_clone(gcc::context *ctxt)
19861 : simple_ipa_opt_pass(pass_data_omp_simd_clone, ctxt)
19862 {}
19863
19864 /* opt_pass methods: */
19865 virtual bool gate (function *);
19866 virtual unsigned int execute (function *) { return ipa_omp_simd_clone (); }
19867 };
19868
19869 bool
19870 pass_omp_simd_clone::gate (function *)
19871 {
19872 return targetm.simd_clone.compute_vecsize_and_simdlen != NULL;
19873 }
19874
19875 } // anon namespace
19876
19877 simple_ipa_opt_pass *
19878 make_pass_omp_simd_clone (gcc::context *ctxt)
19879 {
19880 return new pass_omp_simd_clone (ctxt);
19881 }
19882
19883 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
19884 adds their addresses and sizes to constructor-vector V_CTOR. */
19885 static void
19886 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
19887 vec<constructor_elt, va_gc> *v_ctor)
19888 {
19889 unsigned len = vec_safe_length (v_decls);
19890 for (unsigned i = 0; i < len; i++)
19891 {
19892 tree it = (*v_decls)[i];
19893 bool is_var = TREE_CODE (it) == VAR_DECL;
19894 bool is_link_var
19895 = is_var
19896 #ifdef ACCEL_COMPILER
19897 && DECL_HAS_VALUE_EXPR_P (it)
19898 #endif
19899 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
19900
19901 tree size = NULL_TREE;
19902 if (is_var)
19903 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
19904
19905 tree addr;
19906 if (!is_link_var)
19907 addr = build_fold_addr_expr (it);
19908 else
19909 {
19910 #ifdef ACCEL_COMPILER
19911 /* For "omp declare target link" vars add address of the pointer to
19912 the target table, instead of address of the var. */
19913 tree value_expr = DECL_VALUE_EXPR (it);
19914 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
19915 varpool_node::finalize_decl (link_ptr_decl);
19916 addr = build_fold_addr_expr (link_ptr_decl);
19917 #else
19918 addr = build_fold_addr_expr (it);
19919 #endif
19920
19921 /* Most significant bit of the size marks "omp declare target link"
19922 vars in host and target tables. */
19923 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
19924 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
19925 * BITS_PER_UNIT - 1);
19926 size = wide_int_to_tree (const_ptr_type_node, isize);
19927 }
19928
19929 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
19930 if (is_var)
19931 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
19932 }
19933 }
19934
19935 /* Create new symbols containing (address, size) pairs for global variables,
19936 marked with "omp declare target" attribute, as well as addresses for the
19937 functions, which are outlined offloading regions. */
19938 void
19939 omp_finish_file (void)
19940 {
19941 unsigned num_funcs = vec_safe_length (offload_funcs);
19942 unsigned num_vars = vec_safe_length (offload_vars);
19943
19944 if (num_funcs == 0 && num_vars == 0)
19945 return;
19946
19947 if (targetm_common.have_named_sections)
19948 {
19949 vec<constructor_elt, va_gc> *v_f, *v_v;
19950 vec_alloc (v_f, num_funcs);
19951 vec_alloc (v_v, num_vars * 2);
19952
19953 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
19954 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
19955
19956 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
19957 num_vars * 2);
19958 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
19959 num_funcs);
19960 TYPE_ALIGN (vars_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
19961 TYPE_ALIGN (funcs_decl_type) = TYPE_ALIGN (pointer_sized_int_node);
19962 tree ctor_v = build_constructor (vars_decl_type, v_v);
19963 tree ctor_f = build_constructor (funcs_decl_type, v_f);
19964 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
19965 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
19966 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
19967 get_identifier (".offload_func_table"),
19968 funcs_decl_type);
19969 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
19970 get_identifier (".offload_var_table"),
19971 vars_decl_type);
19972 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
19973 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
19974 otherwise a joint table in a binary will contain padding between
19975 tables from multiple object files. */
19976 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
19977 DECL_ALIGN (funcs_decl) = TYPE_ALIGN (funcs_decl_type);
19978 DECL_ALIGN (vars_decl) = TYPE_ALIGN (vars_decl_type);
19979 DECL_INITIAL (funcs_decl) = ctor_f;
19980 DECL_INITIAL (vars_decl) = ctor_v;
19981 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
19982 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
19983
19984 varpool_node::finalize_decl (vars_decl);
19985 varpool_node::finalize_decl (funcs_decl);
19986 }
19987 else
19988 {
19989 for (unsigned i = 0; i < num_funcs; i++)
19990 {
19991 tree it = (*offload_funcs)[i];
19992 targetm.record_offload_symbol (it);
19993 }
19994 for (unsigned i = 0; i < num_vars; i++)
19995 {
19996 tree it = (*offload_vars)[i];
19997 targetm.record_offload_symbol (it);
19998 }
19999 }
20000 }
20001
20002 /* Find the number of threads (POS = false), or thread number (POS =
20003 true) for an OpenACC region partitioned as MASK. Setup code
20004 required for the calculation is added to SEQ. */
20005
20006 static tree
20007 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
20008 {
20009 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
20010 unsigned ix;
20011
20012 /* Start at gang level, and examine relevant dimension indices. */
20013 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
20014 if (GOMP_DIM_MASK (ix) & mask)
20015 {
20016 tree arg = build_int_cst (unsigned_type_node, ix);
20017
20018 if (res)
20019 {
20020 /* We had an outer index, so scale that by the size of
20021 this dimension. */
20022 tree n = create_tmp_var (integer_type_node);
20023 gimple *call
20024 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg);
20025
20026 gimple_call_set_lhs (call, n);
20027 gimple_seq_add_stmt (seq, call);
20028 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
20029 }
20030 if (pos)
20031 {
20032 /* Determine index in this dimension. */
20033 tree id = create_tmp_var (integer_type_node);
20034 gimple *call = gimple_build_call_internal
20035 (IFN_GOACC_DIM_POS, 1, arg);
20036
20037 gimple_call_set_lhs (call, id);
20038 gimple_seq_add_stmt (seq, call);
20039 if (res)
20040 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
20041 else
20042 res = id;
20043 }
20044 }
20045
20046 if (res == NULL_TREE)
20047 res = integer_zero_node;
20048
20049 return res;
20050 }
20051
20052 /* Transform IFN_GOACC_LOOP calls to actual code. See
20053 expand_oacc_for for where these are generated. At the vector
20054 level, we stride loops, such that each member of a warp will
20055 operate on adjacent iterations. At the worker and gang level,
20056 each gang/warp executes a set of contiguous iterations. Chunking
20057 can override this such that each iteration engine executes a
20058 contiguous chunk, and then moves on to stride to the next chunk. */
20059
20060 static void
20061 oacc_xform_loop (gcall *call)
20062 {
20063 gimple_stmt_iterator gsi = gsi_for_stmt (call);
20064 enum ifn_goacc_loop_kind code
20065 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
20066 tree dir = gimple_call_arg (call, 1);
20067 tree range = gimple_call_arg (call, 2);
20068 tree step = gimple_call_arg (call, 3);
20069 tree chunk_size = NULL_TREE;
20070 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
20071 tree lhs = gimple_call_lhs (call);
20072 tree type = TREE_TYPE (lhs);
20073 tree diff_type = TREE_TYPE (range);
20074 tree r = NULL_TREE;
20075 gimple_seq seq = NULL;
20076 bool chunking = false, striding = true;
20077 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
20078 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
20079
20080 #ifdef ACCEL_COMPILER
20081 chunk_size = gimple_call_arg (call, 4);
20082 if (integer_minus_onep (chunk_size) /* Force static allocation. */
20083 || integer_zerop (chunk_size)) /* Default (also static). */
20084 {
20085 /* If we're at the gang level, we want each to execute a
20086 contiguous run of iterations. Otherwise we want each element
20087 to stride. */
20088 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
20089 chunking = false;
20090 }
20091 else
20092 {
20093 /* Chunk of size 1 is striding. */
20094 striding = integer_onep (chunk_size);
20095 chunking = !striding;
20096 }
20097 #endif
20098
20099 /* striding=true, chunking=true
20100 -> invalid.
20101 striding=true, chunking=false
20102 -> chunks=1
20103 striding=false,chunking=true
20104 -> chunks=ceil (range/(chunksize*threads*step))
20105 striding=false,chunking=false
20106 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
20107 push_gimplify_context (true);
20108
20109 switch (code)
20110 {
20111 default: gcc_unreachable ();
20112
20113 case IFN_GOACC_LOOP_CHUNKS:
20114 if (!chunking)
20115 r = build_int_cst (type, 1);
20116 else
20117 {
20118 /* chunk_max
20119 = (range - dir) / (chunks * step * num_threads) + dir */
20120 tree per = oacc_thread_numbers (false, mask, &seq);
20121 per = fold_convert (type, per);
20122 chunk_size = fold_convert (type, chunk_size);
20123 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
20124 per = fold_build2 (MULT_EXPR, type, per, step);
20125 r = build2 (MINUS_EXPR, type, range, dir);
20126 r = build2 (PLUS_EXPR, type, r, per);
20127 r = build2 (TRUNC_DIV_EXPR, type, r, per);
20128 }
20129 break;
20130
20131 case IFN_GOACC_LOOP_STEP:
20132 {
20133 /* If striding, step by the entire compute volume, otherwise
20134 step by the inner volume. */
20135 unsigned volume = striding ? mask : inner_mask;
20136
20137 r = oacc_thread_numbers (false, volume, &seq);
20138 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
20139 }
20140 break;
20141
20142 case IFN_GOACC_LOOP_OFFSET:
20143 if (striding)
20144 {
20145 r = oacc_thread_numbers (true, mask, &seq);
20146 r = fold_convert (diff_type, r);
20147 }
20148 else
20149 {
20150 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
20151 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
20152 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
20153 inner_size, outer_size);
20154
20155 volume = fold_convert (diff_type, volume);
20156 if (chunking)
20157 chunk_size = fold_convert (diff_type, chunk_size);
20158 else
20159 {
20160 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
20161
20162 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
20163 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
20164 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
20165 }
20166
20167 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
20168 fold_convert (diff_type, inner_size));
20169 r = oacc_thread_numbers (true, outer_mask, &seq);
20170 r = fold_convert (diff_type, r);
20171 r = build2 (MULT_EXPR, diff_type, r, span);
20172
20173 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
20174 inner = fold_convert (diff_type, inner);
20175 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
20176
20177 if (chunking)
20178 {
20179 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
20180 tree per
20181 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
20182 per = build2 (MULT_EXPR, diff_type, per, chunk);
20183
20184 r = build2 (PLUS_EXPR, diff_type, r, per);
20185 }
20186 }
20187 r = fold_build2 (MULT_EXPR, diff_type, r, step);
20188 if (type != diff_type)
20189 r = fold_convert (type, r);
20190 break;
20191
20192 case IFN_GOACC_LOOP_BOUND:
20193 if (striding)
20194 r = range;
20195 else
20196 {
20197 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
20198 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
20199 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
20200 inner_size, outer_size);
20201
20202 volume = fold_convert (diff_type, volume);
20203 if (chunking)
20204 chunk_size = fold_convert (diff_type, chunk_size);
20205 else
20206 {
20207 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
20208
20209 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
20210 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
20211 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
20212 }
20213
20214 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
20215 fold_convert (diff_type, inner_size));
20216
20217 r = fold_build2 (MULT_EXPR, diff_type, span, step);
20218
20219 tree offset = gimple_call_arg (call, 6);
20220 r = build2 (PLUS_EXPR, diff_type, r,
20221 fold_convert (diff_type, offset));
20222 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
20223 diff_type, r, range);
20224 }
20225 if (diff_type != type)
20226 r = fold_convert (type, r);
20227 break;
20228 }
20229
20230 gimplify_assign (lhs, r, &seq);
20231
20232 pop_gimplify_context (NULL);
20233
20234 gsi_replace_with_seq (&gsi, seq, true);
20235 }
20236
20237 /* Default partitioned and minimum partitioned dimensions. */
20238
20239 static int oacc_default_dims[GOMP_DIM_MAX];
20240 static int oacc_min_dims[GOMP_DIM_MAX];
20241
20242 /* Parse the default dimension parameter. This is a set of
20243 :-separated optional compute dimensions. Each specified dimension
20244 is a positive integer. When device type support is added, it is
20245 planned to be a comma separated list of such compute dimensions,
20246 with all but the first prefixed by the colon-terminated device
20247 type. */
20248
20249 static void
20250 oacc_parse_default_dims (const char *dims)
20251 {
20252 int ix;
20253
20254 for (ix = GOMP_DIM_MAX; ix--;)
20255 {
20256 oacc_default_dims[ix] = -1;
20257 oacc_min_dims[ix] = 1;
20258 }
20259
20260 #ifndef ACCEL_COMPILER
20261 /* Cannot be overridden on the host. */
20262 dims = NULL;
20263 #endif
20264 if (dims)
20265 {
20266 const char *pos = dims;
20267
20268 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
20269 {
20270 if (ix)
20271 {
20272 if (*pos != ':')
20273 goto malformed;
20274 pos++;
20275 }
20276
20277 if (*pos != ':')
20278 {
20279 long val;
20280 const char *eptr;
20281
20282 errno = 0;
20283 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
20284 if (errno || val <= 0 || (int) val != val)
20285 goto malformed;
20286 pos = eptr;
20287 oacc_default_dims[ix] = (int) val;
20288 }
20289 }
20290 if (*pos)
20291 {
20292 malformed:
20293 error_at (UNKNOWN_LOCATION,
20294 "-fopenacc-dim operand is malformed at '%s'", pos);
20295 }
20296 }
20297
20298 /* Allow the backend to validate the dimensions. */
20299 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
20300 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
20301 }
20302
20303 /* Validate and update the dimensions for offloaded FN. ATTRS is the
20304 raw attribute. DIMS is an array of dimensions, which is filled in.
20305 LEVEL is the partitioning level of a routine, or -1 for an offload
20306 region itself. USED is the mask of partitioned execution in the
20307 function. */
20308
20309 static void
20310 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
20311 {
20312 tree purpose[GOMP_DIM_MAX];
20313 unsigned ix;
20314 tree pos = TREE_VALUE (attrs);
20315 bool is_kernel = oacc_fn_attrib_kernels_p (attrs);
20316
20317 /* Make sure the attribute creator attached the dimension
20318 information. */
20319 gcc_assert (pos);
20320
20321 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
20322 {
20323 purpose[ix] = TREE_PURPOSE (pos);
20324 tree val = TREE_VALUE (pos);
20325 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
20326 pos = TREE_CHAIN (pos);
20327 }
20328
20329 bool changed = targetm.goacc.validate_dims (fn, dims, level);
20330
20331 /* Default anything left to 1 or a partitioned default. */
20332 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
20333 if (dims[ix] < 0)
20334 {
20335 /* The OpenACC spec says 'If the [num_gangs] clause is not
20336 specified, an implementation-defined default will be used;
20337 the default may depend on the code within the construct.'
20338 (2.5.6). Thus an implementation is free to choose
20339 non-unity default for a parallel region that doesn't have
20340 any gang-partitioned loops. However, it appears that there
20341 is a sufficient body of user code that expects non-gang
20342 partitioned regions to not execute in gang-redundant mode.
20343 So we (a) don't warn about the non-portability and (b) pick
20344 the minimum permissible dimension size when there is no
20345 partitioned execution. Otherwise we pick the global
20346 default for the dimension, which the user can control. The
20347 same wording and logic applies to num_workers and
20348 vector_length, however the worker- or vector- single
20349 execution doesn't have the same impact as gang-redundant
20350 execution. (If the minimum gang-level partioning is not 1,
20351 the target is probably too confusing.) */
20352 dims[ix] = (used & GOMP_DIM_MASK (ix)
20353 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
20354 changed = true;
20355 }
20356
20357 if (changed)
20358 {
20359 /* Replace the attribute with new values. */
20360 pos = NULL_TREE;
20361 for (ix = GOMP_DIM_MAX; ix--;)
20362 {
20363 pos = tree_cons (purpose[ix],
20364 build_int_cst (integer_type_node, dims[ix]),
20365 pos);
20366 if (is_kernel)
20367 TREE_PUBLIC (pos) = 1;
20368 }
20369 replace_oacc_fn_attrib (fn, pos);
20370 }
20371 }
20372
20373 /* Create an empty OpenACC loop structure at LOC. */
20374
20375 static oacc_loop *
20376 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
20377 {
20378 oacc_loop *loop = XCNEW (oacc_loop);
20379
20380 loop->parent = parent;
20381 loop->child = loop->sibling = NULL;
20382
20383 if (parent)
20384 {
20385 loop->sibling = parent->child;
20386 parent->child = loop;
20387 }
20388
20389 loop->loc = loc;
20390 loop->marker = NULL;
20391 memset (loop->heads, 0, sizeof (loop->heads));
20392 memset (loop->tails, 0, sizeof (loop->tails));
20393 loop->routine = NULL_TREE;
20394
20395 loop->mask = loop->flags = 0;
20396 loop->chunk_size = 0;
20397 loop->head_end = NULL;
20398
20399 return loop;
20400 }
20401
20402 /* Create an outermost, dummy OpenACC loop for offloaded function
20403 DECL. */
20404
20405 static oacc_loop *
20406 new_oacc_loop_outer (tree decl)
20407 {
20408 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
20409 }
20410
20411 /* Start a new OpenACC loop structure beginning at head marker HEAD.
20412 Link into PARENT loop. Return the new loop. */
20413
20414 static oacc_loop *
20415 new_oacc_loop (oacc_loop *parent, gcall *marker)
20416 {
20417 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
20418
20419 loop->marker = marker;
20420
20421 /* TODO: This is where device_type flattening would occur for the loop
20422 flags. */
20423
20424 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
20425
20426 tree chunk_size = integer_zero_node;
20427 if (loop->flags & OLF_GANG_STATIC)
20428 chunk_size = gimple_call_arg (marker, 4);
20429 loop->chunk_size = chunk_size;
20430
20431 return loop;
20432 }
20433
20434 /* Create a dummy loop encompassing a call to a openACC routine.
20435 Extract the routine's partitioning requirements. */
20436
20437 static void
20438 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
20439 {
20440 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
20441 int level = oacc_fn_attrib_level (attrs);
20442
20443 gcc_assert (level >= 0);
20444
20445 loop->marker = call;
20446 loop->routine = decl;
20447 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
20448 ^ (GOMP_DIM_MASK (level) - 1));
20449 }
20450
20451 /* Finish off the current OpenACC loop ending at tail marker TAIL.
20452 Return the parent loop. */
20453
20454 static oacc_loop *
20455 finish_oacc_loop (oacc_loop *loop)
20456 {
20457 return loop->parent;
20458 }
20459
20460 /* Free all OpenACC loop structures within LOOP (inclusive). */
20461
20462 static void
20463 free_oacc_loop (oacc_loop *loop)
20464 {
20465 if (loop->sibling)
20466 free_oacc_loop (loop->sibling);
20467 if (loop->child)
20468 free_oacc_loop (loop->child);
20469
20470 free (loop);
20471 }
20472
20473 /* Dump out the OpenACC loop head or tail beginning at FROM. */
20474
20475 static void
20476 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
20477 const char *title, int level)
20478 {
20479 enum ifn_unique_kind kind
20480 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
20481
20482 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
20483 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
20484 {
20485 gimple *stmt = gsi_stmt (gsi);
20486
20487 if (is_gimple_call (stmt)
20488 && gimple_call_internal_p (stmt)
20489 && gimple_call_internal_fn (stmt) == IFN_UNIQUE)
20490 {
20491 enum ifn_unique_kind k
20492 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
20493 (gimple_call_arg (stmt, 0)));
20494
20495 if (k == kind && stmt != from)
20496 break;
20497 }
20498 print_gimple_stmt (file, stmt, depth * 2 + 2, 0);
20499
20500 gsi_next (&gsi);
20501 while (gsi_end_p (gsi))
20502 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
20503 }
20504 }
20505
20506 /* Dump OpenACC loops LOOP, its siblings and its children. */
20507
20508 static void
20509 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
20510 {
20511 int ix;
20512
20513 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
20514 loop->flags, loop->mask,
20515 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
20516
20517 if (loop->marker)
20518 print_gimple_stmt (file, loop->marker, depth * 2, 0);
20519
20520 if (loop->routine)
20521 fprintf (file, "%*sRoutine %s:%u:%s\n",
20522 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
20523 DECL_SOURCE_LINE (loop->routine),
20524 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
20525
20526 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
20527 if (loop->heads[ix])
20528 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
20529 for (ix = GOMP_DIM_MAX; ix--;)
20530 if (loop->tails[ix])
20531 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
20532
20533 if (loop->child)
20534 dump_oacc_loop (file, loop->child, depth + 1);
20535 if (loop->sibling)
20536 dump_oacc_loop (file, loop->sibling, depth);
20537 }
20538
20539 void debug_oacc_loop (oacc_loop *);
20540
20541 /* Dump loops to stderr. */
20542
20543 DEBUG_FUNCTION void
20544 debug_oacc_loop (oacc_loop *loop)
20545 {
20546 dump_oacc_loop (stderr, loop, 0);
20547 }
20548
20549 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
20550 structures as we go. By construction these loops are properly
20551 nested. */
20552
20553 static void
20554 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
20555 {
20556 int marker = 0;
20557 int remaining = 0;
20558
20559 if (bb->flags & BB_VISITED)
20560 return;
20561
20562 follow:
20563 bb->flags |= BB_VISITED;
20564
20565 /* Scan for loop markers. */
20566 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
20567 gsi_next (&gsi))
20568 {
20569 gimple *stmt = gsi_stmt (gsi);
20570
20571 if (!is_gimple_call (stmt))
20572 continue;
20573
20574 gcall *call = as_a <gcall *> (stmt);
20575
20576 /* If this is a routine, make a dummy loop for it. */
20577 if (tree decl = gimple_call_fndecl (call))
20578 if (tree attrs = get_oacc_fn_attrib (decl))
20579 {
20580 gcc_assert (!marker);
20581 new_oacc_loop_routine (loop, call, decl, attrs);
20582 }
20583
20584 if (!gimple_call_internal_p (call))
20585 continue;
20586
20587 if (gimple_call_internal_fn (call) != IFN_UNIQUE)
20588 continue;
20589
20590 enum ifn_unique_kind kind
20591 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
20592 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
20593 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
20594 {
20595 if (gimple_call_num_args (call) == 2)
20596 {
20597 gcc_assert (marker && !remaining);
20598 marker = 0;
20599 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
20600 loop = finish_oacc_loop (loop);
20601 else
20602 loop->head_end = call;
20603 }
20604 else
20605 {
20606 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
20607
20608 if (!marker)
20609 {
20610 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
20611 loop = new_oacc_loop (loop, call);
20612 remaining = count;
20613 }
20614 gcc_assert (count == remaining);
20615 if (remaining)
20616 {
20617 remaining--;
20618 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
20619 loop->heads[marker] = call;
20620 else
20621 loop->tails[remaining] = call;
20622 }
20623 marker++;
20624 }
20625 }
20626 }
20627 if (remaining || marker)
20628 {
20629 bb = single_succ (bb);
20630 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
20631 goto follow;
20632 }
20633
20634 /* Walk successor blocks. */
20635 edge e;
20636 edge_iterator ei;
20637
20638 FOR_EACH_EDGE (e, ei, bb->succs)
20639 oacc_loop_discover_walk (loop, e->dest);
20640 }
20641
20642 /* LOOP is the first sibling. Reverse the order in place and return
20643 the new first sibling. Recurse to child loops. */
20644
20645 static oacc_loop *
20646 oacc_loop_sibling_nreverse (oacc_loop *loop)
20647 {
20648 oacc_loop *last = NULL;
20649 do
20650 {
20651 if (loop->child)
20652 loop->child = oacc_loop_sibling_nreverse (loop->child);
20653
20654 oacc_loop *next = loop->sibling;
20655 loop->sibling = last;
20656 last = loop;
20657 loop = next;
20658 }
20659 while (loop);
20660
20661 return last;
20662 }
20663
20664 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
20665 the current function. */
20666
20667 static oacc_loop *
20668 oacc_loop_discovery ()
20669 {
20670 basic_block bb;
20671
20672 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
20673 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
20674
20675 /* The siblings were constructed in reverse order, reverse them so
20676 that diagnostics come out in an unsurprising order. */
20677 top = oacc_loop_sibling_nreverse (top);
20678
20679 /* Reset the visited flags. */
20680 FOR_ALL_BB_FN (bb, cfun)
20681 bb->flags &= ~BB_VISITED;
20682
20683 return top;
20684 }
20685
20686 /* Transform the abstract internal function markers starting at FROM
20687 to be for partitioning level LEVEL. Stop when we meet another HEAD
20688 or TAIL marker. */
20689
20690 static void
20691 oacc_loop_xform_head_tail (gcall *from, int level)
20692 {
20693 enum ifn_unique_kind kind
20694 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
20695 tree replacement = build_int_cst (unsigned_type_node, level);
20696
20697 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
20698 {
20699 gimple *stmt = gsi_stmt (gsi);
20700
20701 if (is_gimple_call (stmt)
20702 && gimple_call_internal_p (stmt)
20703 && gimple_call_internal_fn (stmt) == IFN_UNIQUE)
20704 {
20705 enum ifn_unique_kind k
20706 = ((enum ifn_unique_kind)
20707 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
20708
20709 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
20710 *gimple_call_arg_ptr (stmt, 2) = replacement;
20711 else if (k == kind && stmt != from)
20712 break;
20713 }
20714 else if (is_gimple_call (stmt)
20715 && gimple_call_internal_p (stmt)
20716 && gimple_call_internal_fn (stmt) == IFN_GOACC_REDUCTION)
20717 *gimple_call_arg_ptr (stmt, 3) = replacement;
20718
20719 gsi_next (&gsi);
20720 while (gsi_end_p (gsi))
20721 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
20722 }
20723 }
20724
20725 /* Transform the IFN_GOACC_LOOP internal functions by providing the
20726 determined partitioning mask and chunking argument. */
20727
20728 static void
20729 oacc_loop_xform_loop (gcall *end_marker, tree mask_arg, tree chunk_arg)
20730 {
20731 gimple_stmt_iterator gsi = gsi_for_stmt (end_marker);
20732
20733 for (;;)
20734 {
20735 for (; !gsi_end_p (gsi); gsi_next (&gsi))
20736 {
20737 gimple *stmt = gsi_stmt (gsi);
20738
20739 if (!is_gimple_call (stmt))
20740 continue;
20741
20742 gcall *call = as_a <gcall *> (stmt);
20743
20744 if (!gimple_call_internal_p (call))
20745 continue;
20746
20747 if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP)
20748 continue;
20749
20750 *gimple_call_arg_ptr (call, 5) = mask_arg;
20751 *gimple_call_arg_ptr (call, 4) = chunk_arg;
20752 if (TREE_INT_CST_LOW (gimple_call_arg (call, 0))
20753 == IFN_GOACC_LOOP_BOUND)
20754 return;
20755 }
20756
20757 /* If we didn't see LOOP_BOUND, it should be in the single
20758 successor block. */
20759 basic_block bb = single_succ (gsi_bb (gsi));
20760 gsi = gsi_start_bb (bb);
20761 }
20762 }
20763
20764 /* Process the discovered OpenACC loops, setting the correct
20765 partitioning level etc. */
20766
20767 static void
20768 oacc_loop_process (oacc_loop *loop)
20769 {
20770 if (loop->child)
20771 oacc_loop_process (loop->child);
20772
20773 if (loop->mask && !loop->routine)
20774 {
20775 int ix;
20776 unsigned mask = loop->mask;
20777 unsigned dim = GOMP_DIM_GANG;
20778 tree mask_arg = build_int_cst (unsigned_type_node, mask);
20779 tree chunk_arg = loop->chunk_size;
20780
20781 oacc_loop_xform_loop (loop->head_end, mask_arg, chunk_arg);
20782
20783 for (ix = 0; ix != GOMP_DIM_MAX && loop->heads[ix]; ix++)
20784 {
20785 gcc_assert (mask);
20786
20787 while (!(GOMP_DIM_MASK (dim) & mask))
20788 dim++;
20789
20790 oacc_loop_xform_head_tail (loop->heads[ix], dim);
20791 oacc_loop_xform_head_tail (loop->tails[ix], dim);
20792
20793 mask ^= GOMP_DIM_MASK (dim);
20794 }
20795 }
20796
20797 if (loop->sibling)
20798 oacc_loop_process (loop->sibling);
20799 }
20800
20801 /* Walk the OpenACC loop heirarchy checking and assigning the
20802 programmer-specified partitionings. OUTER_MASK is the partitioning
20803 this loop is contained within. Return mask of partitioning
20804 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
20805 bit. */
20806
20807 static unsigned
20808 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
20809 {
20810 unsigned this_mask = loop->mask;
20811 unsigned mask_all = 0;
20812 bool noisy = true;
20813
20814 #ifdef ACCEL_COMPILER
20815 /* When device_type is supported, we want the device compiler to be
20816 noisy, if the loop parameters are device_type-specific. */
20817 noisy = false;
20818 #endif
20819
20820 if (!loop->routine)
20821 {
20822 bool auto_par = (loop->flags & OLF_AUTO) != 0;
20823 bool seq_par = (loop->flags & OLF_SEQ) != 0;
20824
20825 this_mask = ((loop->flags >> OLF_DIM_BASE)
20826 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
20827
20828 if ((this_mask != 0) + auto_par + seq_par > 1)
20829 {
20830 if (noisy)
20831 error_at (loop->loc,
20832 seq_par
20833 ? "%<seq%> overrides other OpenACC loop specifiers"
20834 : "%<auto%> conflicts with other OpenACC loop specifiers");
20835 auto_par = false;
20836 loop->flags &= ~OLF_AUTO;
20837 if (seq_par)
20838 {
20839 loop->flags &=
20840 ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
20841 this_mask = 0;
20842 }
20843 }
20844 if (auto_par && (loop->flags & OLF_INDEPENDENT))
20845 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
20846 }
20847
20848 if (this_mask & outer_mask)
20849 {
20850 const oacc_loop *outer;
20851 for (outer = loop->parent; outer; outer = outer->parent)
20852 if (outer->mask & this_mask)
20853 break;
20854
20855 if (noisy)
20856 {
20857 if (outer)
20858 {
20859 error_at (loop->loc,
20860 "%s uses same OpenACC parallelism as containing loop",
20861 loop->routine ? "routine call" : "inner loop");
20862 inform (outer->loc, "containing loop here");
20863 }
20864 else
20865 error_at (loop->loc,
20866 "%s uses OpenACC parallelism disallowed by containing routine",
20867 loop->routine ? "routine call" : "loop");
20868
20869 if (loop->routine)
20870 inform (DECL_SOURCE_LOCATION (loop->routine),
20871 "routine %qD declared here", loop->routine);
20872 }
20873 this_mask &= ~outer_mask;
20874 }
20875 else
20876 {
20877 unsigned outermost = this_mask & -this_mask;
20878
20879 if (outermost && outermost <= outer_mask)
20880 {
20881 if (noisy)
20882 {
20883 error_at (loop->loc,
20884 "incorrectly nested OpenACC loop parallelism");
20885
20886 const oacc_loop *outer;
20887 for (outer = loop->parent;
20888 outer->flags && outer->flags < outermost;
20889 outer = outer->parent)
20890 continue;
20891 inform (outer->loc, "containing loop here");
20892 }
20893
20894 this_mask &= ~outermost;
20895 }
20896 }
20897
20898 loop->mask = this_mask;
20899 mask_all |= this_mask;
20900
20901 if (loop->child)
20902 mask_all |= oacc_loop_fixed_partitions (loop->child,
20903 outer_mask | this_mask);
20904
20905 if (loop->sibling)
20906 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
20907
20908 return mask_all;
20909 }
20910
20911 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
20912 OUTER_MASK is the partitioning this loop is contained within.
20913 Return the cumulative partitioning used by this loop, siblings and
20914 children. */
20915
20916 static unsigned
20917 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
20918 {
20919 unsigned inner_mask = 0;
20920 bool noisy = true;
20921
20922 #ifdef ACCEL_COMPILER
20923 /* When device_type is supported, we want the device compiler to be
20924 noisy, if the loop parameters are device_type-specific. */
20925 noisy = false;
20926 #endif
20927
20928 if (loop->child)
20929 inner_mask |= oacc_loop_auto_partitions (loop->child,
20930 outer_mask | loop->mask);
20931
20932 if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT))
20933 {
20934 unsigned this_mask = 0;
20935
20936 /* Determine the outermost partitioning used within this loop. */
20937 this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX);
20938 this_mask = (this_mask & -this_mask);
20939
20940 /* Pick the partitioning just inside that one. */
20941 this_mask >>= 1;
20942
20943 /* And avoid picking one use by an outer loop. */
20944 this_mask &= ~outer_mask;
20945
20946 if (!this_mask && noisy)
20947 warning_at (loop->loc, 0,
20948 "insufficient partitioning available to parallelize loop");
20949
20950 if (dump_file)
20951 fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
20952 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
20953 this_mask);
20954
20955 loop->mask = this_mask;
20956 }
20957 inner_mask |= loop->mask;
20958
20959 if (loop->sibling)
20960 inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
20961
20962 return inner_mask;
20963 }
20964
20965 /* Walk the OpenACC loop heirarchy to check and assign partitioning
20966 axes. Return mask of partitioning. */
20967
20968 static unsigned
20969 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
20970 {
20971 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
20972
20973 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
20974 {
20975 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
20976 mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
20977 }
20978 return mask_all;
20979 }
20980
20981 /* Default fork/join early expander. Delete the function calls if
20982 there is no RTL expander. */
20983
20984 bool
20985 default_goacc_fork_join (gcall *ARG_UNUSED (call),
20986 const int *ARG_UNUSED (dims), bool is_fork)
20987 {
20988 if (is_fork)
20989 return targetm.have_oacc_fork ();
20990 else
20991 return targetm.have_oacc_join ();
20992 }
20993
20994 /* Default goacc.reduction early expander.
20995
20996 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
20997 If RES_PTR is not integer-zerop:
20998 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
20999 TEARDOWN - emit '*RES_PTR = VAR'
21000 If LHS is not NULL
21001 emit 'LHS = VAR' */
21002
21003 void
21004 default_goacc_reduction (gcall *call)
21005 {
21006 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
21007 gimple_stmt_iterator gsi = gsi_for_stmt (call);
21008 tree lhs = gimple_call_lhs (call);
21009 tree var = gimple_call_arg (call, 2);
21010 gimple_seq seq = NULL;
21011
21012 if (code == IFN_GOACC_REDUCTION_SETUP
21013 || code == IFN_GOACC_REDUCTION_TEARDOWN)
21014 {
21015 /* Setup and Teardown need to copy from/to the receiver object,
21016 if there is one. */
21017 tree ref_to_res = gimple_call_arg (call, 1);
21018
21019 if (!integer_zerop (ref_to_res))
21020 {
21021 tree dst = build_simple_mem_ref (ref_to_res);
21022 tree src = var;
21023
21024 if (code == IFN_GOACC_REDUCTION_SETUP)
21025 {
21026 src = dst;
21027 dst = lhs;
21028 lhs = NULL;
21029 }
21030 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
21031 }
21032 }
21033
21034 /* Copy VAR to LHS, if there is an LHS. */
21035 if (lhs)
21036 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
21037
21038 gsi_replace_with_seq (&gsi, seq, true);
21039 }
21040
21041 /* Main entry point for oacc transformations which run on the device
21042 compiler after LTO, so we know what the target device is at this
21043 point (including the host fallback). */
21044
21045 static unsigned int
21046 execute_oacc_device_lower ()
21047 {
21048 tree attrs = get_oacc_fn_attrib (current_function_decl);
21049
21050 if (!attrs)
21051 /* Not an offloaded function. */
21052 return 0;
21053
21054 /* Parse the default dim argument exactly once. */
21055 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
21056 {
21057 oacc_parse_default_dims (flag_openacc_dims);
21058 flag_openacc_dims = (char *)&flag_openacc_dims;
21059 }
21060
21061 /* Discover, partition and process the loops. */
21062 oacc_loop *loops = oacc_loop_discovery ();
21063 int fn_level = oacc_fn_attrib_level (attrs);
21064
21065 if (dump_file)
21066 fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs)
21067 ? "Function is kernels offload\n"
21068 : fn_level < 0 ? "Function is parallel offload\n"
21069 : "Function is routine level %d\n", fn_level);
21070
21071 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
21072 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
21073 int dims[GOMP_DIM_MAX];
21074
21075 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
21076
21077 if (dump_file)
21078 {
21079 const char *comma = "Compute dimensions [";
21080 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
21081 fprintf (dump_file, "%s%d", comma, dims[ix]);
21082 fprintf (dump_file, "]\n");
21083 }
21084
21085 oacc_loop_process (loops);
21086 if (dump_file)
21087 {
21088 fprintf (dump_file, "OpenACC loops\n");
21089 dump_oacc_loop (dump_file, loops, 0);
21090 fprintf (dump_file, "\n");
21091 }
21092
21093 /* Offloaded targets may introduce new basic blocks, which require
21094 dominance information to update SSA. */
21095 calculate_dominance_info (CDI_DOMINATORS);
21096
21097 /* Now lower internal loop functions to target-specific code
21098 sequences. */
21099 basic_block bb;
21100 FOR_ALL_BB_FN (bb, cfun)
21101 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
21102 {
21103 gimple *stmt = gsi_stmt (gsi);
21104 if (!is_gimple_call (stmt))
21105 {
21106 gsi_next (&gsi);
21107 continue;
21108 }
21109
21110 gcall *call = as_a <gcall *> (stmt);
21111 if (!gimple_call_internal_p (call))
21112 {
21113 gsi_next (&gsi);
21114 continue;
21115 }
21116
21117 /* Rewind to allow rescan. */
21118 gsi_prev (&gsi);
21119 bool rescan = false, remove = false;
21120 enum internal_fn ifn_code = gimple_call_internal_fn (call);
21121
21122 switch (ifn_code)
21123 {
21124 default: break;
21125
21126 case IFN_GOACC_LOOP:
21127 oacc_xform_loop (call);
21128 rescan = true;
21129 break;
21130
21131 case IFN_GOACC_REDUCTION:
21132 /* Mark the function for SSA renaming. */
21133 mark_virtual_operands_for_renaming (cfun);
21134
21135 /* If the level is -1, this ended up being an unused
21136 axis. Handle as a default. */
21137 if (integer_minus_onep (gimple_call_arg (call, 3)))
21138 default_goacc_reduction (call);
21139 else
21140 targetm.goacc.reduction (call);
21141 rescan = true;
21142 break;
21143
21144 case IFN_UNIQUE:
21145 {
21146 enum ifn_unique_kind kind
21147 = ((enum ifn_unique_kind)
21148 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
21149
21150 switch (kind)
21151 {
21152 default:
21153 gcc_unreachable ();
21154
21155 case IFN_UNIQUE_OACC_FORK:
21156 case IFN_UNIQUE_OACC_JOIN:
21157 if (integer_minus_onep (gimple_call_arg (call, 2)))
21158 remove = true;
21159 else if (!targetm.goacc.fork_join
21160 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
21161 remove = true;
21162 break;
21163
21164 case IFN_UNIQUE_OACC_HEAD_MARK:
21165 case IFN_UNIQUE_OACC_TAIL_MARK:
21166 remove = true;
21167 break;
21168 }
21169 break;
21170 }
21171 }
21172
21173 if (gsi_end_p (gsi))
21174 /* We rewound past the beginning of the BB. */
21175 gsi = gsi_start_bb (bb);
21176 else
21177 /* Undo the rewind. */
21178 gsi_next (&gsi);
21179
21180 if (remove)
21181 {
21182 if (gimple_vdef (call))
21183 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
21184 if (gimple_call_lhs (call))
21185 {
21186 /* Propagate the data dependency var. */
21187 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
21188 gimple_call_arg (call, 1));
21189 gsi_replace (&gsi, ass, false);
21190 }
21191 else
21192 gsi_remove (&gsi, true);
21193 }
21194 else if (!rescan)
21195 /* If not rescanning, advance over the call. */
21196 gsi_next (&gsi);
21197 }
21198
21199 free_oacc_loop (loops);
21200
21201 return 0;
21202 }
21203
21204 /* Default launch dimension validator. Force everything to 1. A
21205 backend that wants to provide larger dimensions must override this
21206 hook. */
21207
21208 bool
21209 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
21210 int ARG_UNUSED (fn_level))
21211 {
21212 bool changed = false;
21213
21214 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
21215 {
21216 if (dims[ix] != 1)
21217 {
21218 dims[ix] = 1;
21219 changed = true;
21220 }
21221 }
21222
21223 return changed;
21224 }
21225
21226 /* Default dimension bound is unknown on accelerator and 1 on host. */
21227
21228 int
21229 default_goacc_dim_limit (int ARG_UNUSED (axis))
21230 {
21231 #ifdef ACCEL_COMPILER
21232 return 0;
21233 #else
21234 return 1;
21235 #endif
21236 }
21237
21238 namespace {
21239
21240 const pass_data pass_data_oacc_device_lower =
21241 {
21242 GIMPLE_PASS, /* type */
21243 "oaccdevlow", /* name */
21244 OPTGROUP_NONE, /* optinfo_flags */
21245 TV_NONE, /* tv_id */
21246 PROP_cfg, /* properties_required */
21247 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
21248 0, /* properties_destroyed */
21249 0, /* todo_flags_start */
21250 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
21251 };
21252
21253 class pass_oacc_device_lower : public gimple_opt_pass
21254 {
21255 public:
21256 pass_oacc_device_lower (gcc::context *ctxt)
21257 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
21258 {}
21259
21260 /* opt_pass methods: */
21261 virtual unsigned int execute (function *)
21262 {
21263 bool gate = flag_openacc != 0;
21264
21265 if (!gate)
21266 return 0;
21267
21268 return execute_oacc_device_lower ();
21269 }
21270
21271 }; // class pass_oacc_device_lower
21272
21273 } // anon namespace
21274
21275 gimple_opt_pass *
21276 make_pass_oacc_device_lower (gcc::context *ctxt)
21277 {
21278 return new pass_oacc_device_lower (ctxt);
21279 }
21280
21281 /* "omp declare target link" handling pass. */
21282
21283 namespace {
21284
21285 const pass_data pass_data_omp_target_link =
21286 {
21287 GIMPLE_PASS, /* type */
21288 "omptargetlink", /* name */
21289 OPTGROUP_NONE, /* optinfo_flags */
21290 TV_NONE, /* tv_id */
21291 PROP_ssa, /* properties_required */
21292 0, /* properties_provided */
21293 0, /* properties_destroyed */
21294 0, /* todo_flags_start */
21295 TODO_update_ssa, /* todo_flags_finish */
21296 };
21297
21298 class pass_omp_target_link : public gimple_opt_pass
21299 {
21300 public:
21301 pass_omp_target_link (gcc::context *ctxt)
21302 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
21303 {}
21304
21305 /* opt_pass methods: */
21306 virtual bool gate (function *fun)
21307 {
21308 #ifdef ACCEL_COMPILER
21309 tree attrs = DECL_ATTRIBUTES (fun->decl);
21310 return lookup_attribute ("omp declare target", attrs)
21311 || lookup_attribute ("omp target entrypoint", attrs);
21312 #else
21313 (void) fun;
21314 return false;
21315 #endif
21316 }
21317
21318 virtual unsigned execute (function *);
21319 };
21320
21321 /* Callback for walk_gimple_stmt used to scan for link var operands. */
21322
21323 static tree
21324 find_link_var_op (tree *tp, int *walk_subtrees, void *)
21325 {
21326 tree t = *tp;
21327
21328 if (TREE_CODE (t) == VAR_DECL && DECL_HAS_VALUE_EXPR_P (t)
21329 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
21330 {
21331 *walk_subtrees = 0;
21332 return t;
21333 }
21334
21335 return NULL_TREE;
21336 }
21337
21338 unsigned
21339 pass_omp_target_link::execute (function *fun)
21340 {
21341 basic_block bb;
21342 FOR_EACH_BB_FN (bb, fun)
21343 {
21344 gimple_stmt_iterator gsi;
21345 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
21346 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
21347 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
21348 }
21349
21350 return 0;
21351 }
21352
21353 } // anon namespace
21354
21355 gimple_opt_pass *
21356 make_pass_omp_target_link (gcc::context *ctxt)
21357 {
21358 return new pass_omp_target_link (ctxt);
21359 }
21360
21361 #include "gt-omp-low.h"