re PR c++/87934 (struct with NSDMI of enum makes initialization a non-constant expres...
[gcc.git] / gcc / omp-offload.c
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
2 and a lowering pass for OpenACC device directives.
3
4 Copyright (C) 2005-2018 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "pretty-print.h"
33 #include "diagnostic-core.h"
34 #include "fold-const.h"
35 #include "internal-fn.h"
36 #include "langhooks.h"
37 #include "gimplify.h"
38 #include "gimple-iterator.h"
39 #include "gimplify-me.h"
40 #include "gimple-walk.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-nested.h"
44 #include "stor-layout.h"
45 #include "common/common-target.h"
46 #include "omp-general.h"
47 #include "omp-offload.h"
48 #include "lto-section-names.h"
49 #include "gomp-constants.h"
50 #include "gimple-pretty-print.h"
51 #include "intl.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "cfgloop.h"
55
56 /* Describe the OpenACC looping structure of a function. The entire
57 function is held in a 'NULL' loop. */
58
59 struct oacc_loop
60 {
61 oacc_loop *parent; /* Containing loop. */
62
63 oacc_loop *child; /* First inner loop. */
64
65 oacc_loop *sibling; /* Next loop within same parent. */
66
67 location_t loc; /* Location of the loop start. */
68
69 gcall *marker; /* Initial head marker. */
70
71 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
72 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
73
74 tree routine; /* Pseudo-loop enclosing a routine. */
75
76 unsigned mask; /* Partitioning mask. */
77 unsigned e_mask; /* Partitioning of element loops (when tiling). */
78 unsigned inner; /* Partitioning of inner loops. */
79 unsigned flags; /* Partitioning flags. */
80 vec<gcall *> ifns; /* Contained loop abstraction functions. */
81 tree chunk_size; /* Chunk size. */
82 gcall *head_end; /* Final marker of head sequence. */
83 };
84
85 /* Holds offload tables with decls. */
86 vec<tree, va_gc> *offload_funcs, *offload_vars;
87
88 /* Return level at which oacc routine may spawn a partitioned loop, or
89 -1 if it is not a routine (i.e. is an offload fn). */
90
91 static int
92 oacc_fn_attrib_level (tree attr)
93 {
94 tree pos = TREE_VALUE (attr);
95
96 if (!TREE_PURPOSE (pos))
97 return -1;
98
99 int ix = 0;
100 for (ix = 0; ix != GOMP_DIM_MAX;
101 ix++, pos = TREE_CHAIN (pos))
102 if (!integer_zerop (TREE_PURPOSE (pos)))
103 break;
104
105 return ix;
106 }
107
108 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
109 adds their addresses and sizes to constructor-vector V_CTOR. */
110
111 static void
112 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
113 vec<constructor_elt, va_gc> *v_ctor)
114 {
115 unsigned len = vec_safe_length (v_decls);
116 for (unsigned i = 0; i < len; i++)
117 {
118 tree it = (*v_decls)[i];
119 bool is_var = VAR_P (it);
120 bool is_link_var
121 = is_var
122 #ifdef ACCEL_COMPILER
123 && DECL_HAS_VALUE_EXPR_P (it)
124 #endif
125 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
126
127 tree size = NULL_TREE;
128 if (is_var)
129 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
130
131 tree addr;
132 if (!is_link_var)
133 addr = build_fold_addr_expr (it);
134 else
135 {
136 #ifdef ACCEL_COMPILER
137 /* For "omp declare target link" vars add address of the pointer to
138 the target table, instead of address of the var. */
139 tree value_expr = DECL_VALUE_EXPR (it);
140 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
141 varpool_node::finalize_decl (link_ptr_decl);
142 addr = build_fold_addr_expr (link_ptr_decl);
143 #else
144 addr = build_fold_addr_expr (it);
145 #endif
146
147 /* Most significant bit of the size marks "omp declare target link"
148 vars in host and target tables. */
149 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
150 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
151 * BITS_PER_UNIT - 1);
152 size = wide_int_to_tree (const_ptr_type_node, isize);
153 }
154
155 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
156 if (is_var)
157 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
158 }
159 }
160
161 /* Create new symbols containing (address, size) pairs for global variables,
162 marked with "omp declare target" attribute, as well as addresses for the
163 functions, which are outlined offloading regions. */
164 void
165 omp_finish_file (void)
166 {
167 unsigned num_funcs = vec_safe_length (offload_funcs);
168 unsigned num_vars = vec_safe_length (offload_vars);
169
170 if (num_funcs == 0 && num_vars == 0)
171 return;
172
173 if (targetm_common.have_named_sections)
174 {
175 vec<constructor_elt, va_gc> *v_f, *v_v;
176 vec_alloc (v_f, num_funcs);
177 vec_alloc (v_v, num_vars * 2);
178
179 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
180 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
181
182 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
183 num_vars * 2);
184 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
185 num_funcs);
186 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
187 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
188 tree ctor_v = build_constructor (vars_decl_type, v_v);
189 tree ctor_f = build_constructor (funcs_decl_type, v_f);
190 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
191 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
192 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
193 get_identifier (".offload_func_table"),
194 funcs_decl_type);
195 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
196 get_identifier (".offload_var_table"),
197 vars_decl_type);
198 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
199 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
200 otherwise a joint table in a binary will contain padding between
201 tables from multiple object files. */
202 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
203 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
204 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
205 DECL_INITIAL (funcs_decl) = ctor_f;
206 DECL_INITIAL (vars_decl) = ctor_v;
207 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
208 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
209
210 varpool_node::finalize_decl (vars_decl);
211 varpool_node::finalize_decl (funcs_decl);
212 }
213 else
214 {
215 for (unsigned i = 0; i < num_funcs; i++)
216 {
217 tree it = (*offload_funcs)[i];
218 targetm.record_offload_symbol (it);
219 }
220 for (unsigned i = 0; i < num_vars; i++)
221 {
222 tree it = (*offload_vars)[i];
223 targetm.record_offload_symbol (it);
224 }
225 }
226 }
227
228 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
229 axis DIM. Return a tmp var holding the result. */
230
231 static tree
232 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
233 {
234 tree arg = build_int_cst (unsigned_type_node, dim);
235 tree size = create_tmp_var (integer_type_node);
236 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
237 gimple *call = gimple_build_call_internal (fn, 1, arg);
238
239 gimple_call_set_lhs (call, size);
240 gimple_seq_add_stmt (seq, call);
241
242 return size;
243 }
244
245 /* Find the number of threads (POS = false), or thread number (POS =
246 true) for an OpenACC region partitioned as MASK. Setup code
247 required for the calculation is added to SEQ. */
248
249 static tree
250 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
251 {
252 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
253 unsigned ix;
254
255 /* Start at gang level, and examine relevant dimension indices. */
256 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
257 if (GOMP_DIM_MASK (ix) & mask)
258 {
259 if (res)
260 {
261 /* We had an outer index, so scale that by the size of
262 this dimension. */
263 tree n = oacc_dim_call (false, ix, seq);
264 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
265 }
266 if (pos)
267 {
268 /* Determine index in this dimension. */
269 tree id = oacc_dim_call (true, ix, seq);
270 if (res)
271 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
272 else
273 res = id;
274 }
275 }
276
277 if (res == NULL_TREE)
278 res = integer_zero_node;
279
280 return res;
281 }
282
283 /* Transform IFN_GOACC_LOOP calls to actual code. See
284 expand_oacc_for for where these are generated. At the vector
285 level, we stride loops, such that each member of a warp will
286 operate on adjacent iterations. At the worker and gang level,
287 each gang/warp executes a set of contiguous iterations. Chunking
288 can override this such that each iteration engine executes a
289 contiguous chunk, and then moves on to stride to the next chunk. */
290
291 static void
292 oacc_xform_loop (gcall *call)
293 {
294 gimple_stmt_iterator gsi = gsi_for_stmt (call);
295 enum ifn_goacc_loop_kind code
296 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
297 tree dir = gimple_call_arg (call, 1);
298 tree range = gimple_call_arg (call, 2);
299 tree step = gimple_call_arg (call, 3);
300 tree chunk_size = NULL_TREE;
301 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
302 tree lhs = gimple_call_lhs (call);
303 tree type = TREE_TYPE (lhs);
304 tree diff_type = TREE_TYPE (range);
305 tree r = NULL_TREE;
306 gimple_seq seq = NULL;
307 bool chunking = false, striding = true;
308 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
309 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
310
311 #ifdef ACCEL_COMPILER
312 chunk_size = gimple_call_arg (call, 4);
313 if (integer_minus_onep (chunk_size) /* Force static allocation. */
314 || integer_zerop (chunk_size)) /* Default (also static). */
315 {
316 /* If we're at the gang level, we want each to execute a
317 contiguous run of iterations. Otherwise we want each element
318 to stride. */
319 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
320 chunking = false;
321 }
322 else
323 {
324 /* Chunk of size 1 is striding. */
325 striding = integer_onep (chunk_size);
326 chunking = !striding;
327 }
328 #endif
329
330 /* striding=true, chunking=true
331 -> invalid.
332 striding=true, chunking=false
333 -> chunks=1
334 striding=false,chunking=true
335 -> chunks=ceil (range/(chunksize*threads*step))
336 striding=false,chunking=false
337 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
338 push_gimplify_context (true);
339
340 switch (code)
341 {
342 default: gcc_unreachable ();
343
344 case IFN_GOACC_LOOP_CHUNKS:
345 if (!chunking)
346 r = build_int_cst (type, 1);
347 else
348 {
349 /* chunk_max
350 = (range - dir) / (chunks * step * num_threads) + dir */
351 tree per = oacc_thread_numbers (false, mask, &seq);
352 per = fold_convert (type, per);
353 chunk_size = fold_convert (type, chunk_size);
354 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
355 per = fold_build2 (MULT_EXPR, type, per, step);
356 r = build2 (MINUS_EXPR, type, range, dir);
357 r = build2 (PLUS_EXPR, type, r, per);
358 r = build2 (TRUNC_DIV_EXPR, type, r, per);
359 }
360 break;
361
362 case IFN_GOACC_LOOP_STEP:
363 {
364 /* If striding, step by the entire compute volume, otherwise
365 step by the inner volume. */
366 unsigned volume = striding ? mask : inner_mask;
367
368 r = oacc_thread_numbers (false, volume, &seq);
369 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
370 }
371 break;
372
373 case IFN_GOACC_LOOP_OFFSET:
374 /* Enable vectorization on non-SIMT targets. */
375 if (!targetm.simt.vf
376 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
377 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
378 the loop. */
379 && (flag_tree_loop_vectorize
380 || !global_options_set.x_flag_tree_loop_vectorize))
381 {
382 basic_block bb = gsi_bb (gsi);
383 struct loop *parent = bb->loop_father;
384 struct loop *body = parent->inner;
385
386 parent->force_vectorize = true;
387 parent->safelen = INT_MAX;
388
389 /* "Chunking loops" may have inner loops. */
390 if (parent->inner)
391 {
392 body->force_vectorize = true;
393 body->safelen = INT_MAX;
394 }
395
396 cfun->has_force_vectorize_loops = true;
397 }
398 if (striding)
399 {
400 r = oacc_thread_numbers (true, mask, &seq);
401 r = fold_convert (diff_type, r);
402 }
403 else
404 {
405 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
406 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
407 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
408 inner_size, outer_size);
409
410 volume = fold_convert (diff_type, volume);
411 if (chunking)
412 chunk_size = fold_convert (diff_type, chunk_size);
413 else
414 {
415 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
416
417 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
418 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
419 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
420 }
421
422 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
423 fold_convert (diff_type, inner_size));
424 r = oacc_thread_numbers (true, outer_mask, &seq);
425 r = fold_convert (diff_type, r);
426 r = build2 (MULT_EXPR, diff_type, r, span);
427
428 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
429 inner = fold_convert (diff_type, inner);
430 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
431
432 if (chunking)
433 {
434 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
435 tree per
436 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
437 per = build2 (MULT_EXPR, diff_type, per, chunk);
438
439 r = build2 (PLUS_EXPR, diff_type, r, per);
440 }
441 }
442 r = fold_build2 (MULT_EXPR, diff_type, r, step);
443 if (type != diff_type)
444 r = fold_convert (type, r);
445 break;
446
447 case IFN_GOACC_LOOP_BOUND:
448 if (striding)
449 r = range;
450 else
451 {
452 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
453 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
454 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
455 inner_size, outer_size);
456
457 volume = fold_convert (diff_type, volume);
458 if (chunking)
459 chunk_size = fold_convert (diff_type, chunk_size);
460 else
461 {
462 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
463
464 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
465 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
466 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
467 }
468
469 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
470 fold_convert (diff_type, inner_size));
471
472 r = fold_build2 (MULT_EXPR, diff_type, span, step);
473
474 tree offset = gimple_call_arg (call, 6);
475 r = build2 (PLUS_EXPR, diff_type, r,
476 fold_convert (diff_type, offset));
477 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
478 diff_type, r, range);
479 }
480 if (diff_type != type)
481 r = fold_convert (type, r);
482 break;
483 }
484
485 gimplify_assign (lhs, r, &seq);
486
487 pop_gimplify_context (NULL);
488
489 gsi_replace_with_seq (&gsi, seq, true);
490 }
491
492 /* Transform a GOACC_TILE call. Determines the element loop span for
493 the specified loop of the nest. This is 1 if we're not tiling.
494
495 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
496
497 static void
498 oacc_xform_tile (gcall *call)
499 {
500 gimple_stmt_iterator gsi = gsi_for_stmt (call);
501 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
502 /* Inner loops have higher loop_nos. */
503 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
504 tree tile_size = gimple_call_arg (call, 2);
505 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
506 tree lhs = gimple_call_lhs (call);
507 tree type = TREE_TYPE (lhs);
508 gimple_seq seq = NULL;
509 tree span = build_int_cst (type, 1);
510
511 gcc_assert (!(e_mask
512 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
513 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
514 push_gimplify_context (!seen_error ());
515
516 #ifndef ACCEL_COMPILER
517 /* Partitioning disabled on host compilers. */
518 e_mask = 0;
519 #endif
520 if (!e_mask)
521 /* Not paritioning. */
522 span = integer_one_node;
523 else if (!integer_zerop (tile_size))
524 /* User explicitly specified size. */
525 span = tile_size;
526 else
527 {
528 /* Pick a size based on the paritioning of the element loop and
529 the number of loop nests. */
530 tree first_size = NULL_TREE;
531 tree second_size = NULL_TREE;
532
533 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
534 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
535 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
536 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
537
538 if (!first_size)
539 {
540 first_size = second_size;
541 second_size = NULL_TREE;
542 }
543
544 if (loop_no + 1 == collapse)
545 {
546 span = first_size;
547 if (!loop_no && second_size)
548 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
549 span, second_size);
550 }
551 else if (loop_no + 2 == collapse)
552 span = second_size;
553 else
554 span = NULL_TREE;
555
556 if (!span)
557 /* There's no obvious element size for this loop. Options
558 are 1, first_size or some non-unity constant (32 is my
559 favourite). We should gather some statistics. */
560 span = first_size;
561 }
562
563 span = fold_convert (type, span);
564 gimplify_assign (lhs, span, &seq);
565
566 pop_gimplify_context (NULL);
567
568 gsi_replace_with_seq (&gsi, seq, true);
569 }
570
571 /* Default partitioned and minimum partitioned dimensions. */
572
573 static int oacc_default_dims[GOMP_DIM_MAX];
574 static int oacc_min_dims[GOMP_DIM_MAX];
575
576 /* Parse the default dimension parameter. This is a set of
577 :-separated optional compute dimensions. Each specified dimension
578 is a positive integer. When device type support is added, it is
579 planned to be a comma separated list of such compute dimensions,
580 with all but the first prefixed by the colon-terminated device
581 type. */
582
583 static void
584 oacc_parse_default_dims (const char *dims)
585 {
586 int ix;
587
588 for (ix = GOMP_DIM_MAX; ix--;)
589 {
590 oacc_default_dims[ix] = -1;
591 oacc_min_dims[ix] = 1;
592 }
593
594 #ifndef ACCEL_COMPILER
595 /* Cannot be overridden on the host. */
596 dims = NULL;
597 #endif
598 if (dims)
599 {
600 const char *pos = dims;
601
602 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
603 {
604 if (ix)
605 {
606 if (*pos != ':')
607 goto malformed;
608 pos++;
609 }
610
611 if (*pos != ':')
612 {
613 long val;
614 const char *eptr;
615
616 errno = 0;
617 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
618 if (errno || val <= 0 || (int) val != val)
619 goto malformed;
620 pos = eptr;
621 oacc_default_dims[ix] = (int) val;
622 }
623 }
624 if (*pos)
625 {
626 malformed:
627 error_at (UNKNOWN_LOCATION,
628 "-fopenacc-dim operand is malformed at '%s'", pos);
629 }
630 }
631
632 /* Allow the backend to validate the dimensions. */
633 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
634 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
635 }
636
637 /* Validate and update the dimensions for offloaded FN. ATTRS is the
638 raw attribute. DIMS is an array of dimensions, which is filled in.
639 LEVEL is the partitioning level of a routine, or -1 for an offload
640 region itself. USED is the mask of partitioned execution in the
641 function. */
642
643 static void
644 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
645 {
646 tree purpose[GOMP_DIM_MAX];
647 unsigned ix;
648 tree pos = TREE_VALUE (attrs);
649
650 /* Make sure the attribute creator attached the dimension
651 information. */
652 gcc_assert (pos);
653
654 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
655 {
656 purpose[ix] = TREE_PURPOSE (pos);
657 tree val = TREE_VALUE (pos);
658 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
659 pos = TREE_CHAIN (pos);
660 }
661
662 bool changed = targetm.goacc.validate_dims (fn, dims, level);
663
664 /* Default anything left to 1 or a partitioned default. */
665 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
666 if (dims[ix] < 0)
667 {
668 /* The OpenACC spec says 'If the [num_gangs] clause is not
669 specified, an implementation-defined default will be used;
670 the default may depend on the code within the construct.'
671 (2.5.6). Thus an implementation is free to choose
672 non-unity default for a parallel region that doesn't have
673 any gang-partitioned loops. However, it appears that there
674 is a sufficient body of user code that expects non-gang
675 partitioned regions to not execute in gang-redundant mode.
676 So we (a) don't warn about the non-portability and (b) pick
677 the minimum permissible dimension size when there is no
678 partitioned execution. Otherwise we pick the global
679 default for the dimension, which the user can control. The
680 same wording and logic applies to num_workers and
681 vector_length, however the worker- or vector- single
682 execution doesn't have the same impact as gang-redundant
683 execution. (If the minimum gang-level partioning is not 1,
684 the target is probably too confusing.) */
685 dims[ix] = (used & GOMP_DIM_MASK (ix)
686 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
687 changed = true;
688 }
689
690 if (changed)
691 {
692 /* Replace the attribute with new values. */
693 pos = NULL_TREE;
694 for (ix = GOMP_DIM_MAX; ix--;)
695 pos = tree_cons (purpose[ix],
696 build_int_cst (integer_type_node, dims[ix]), pos);
697 oacc_replace_fn_attrib (fn, pos);
698 }
699 }
700
701 /* Create an empty OpenACC loop structure at LOC. */
702
703 static oacc_loop *
704 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
705 {
706 oacc_loop *loop = XCNEW (oacc_loop);
707
708 loop->parent = parent;
709
710 if (parent)
711 {
712 loop->sibling = parent->child;
713 parent->child = loop;
714 }
715
716 loop->loc = loc;
717 return loop;
718 }
719
720 /* Create an outermost, dummy OpenACC loop for offloaded function
721 DECL. */
722
723 static oacc_loop *
724 new_oacc_loop_outer (tree decl)
725 {
726 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
727 }
728
729 /* Start a new OpenACC loop structure beginning at head marker HEAD.
730 Link into PARENT loop. Return the new loop. */
731
732 static oacc_loop *
733 new_oacc_loop (oacc_loop *parent, gcall *marker)
734 {
735 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
736
737 loop->marker = marker;
738
739 /* TODO: This is where device_type flattening would occur for the loop
740 flags. */
741
742 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
743
744 tree chunk_size = integer_zero_node;
745 if (loop->flags & OLF_GANG_STATIC)
746 chunk_size = gimple_call_arg (marker, 4);
747 loop->chunk_size = chunk_size;
748
749 return loop;
750 }
751
752 /* Create a dummy loop encompassing a call to a openACC routine.
753 Extract the routine's partitioning requirements. */
754
755 static void
756 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
757 {
758 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
759 int level = oacc_fn_attrib_level (attrs);
760
761 gcc_assert (level >= 0);
762
763 loop->marker = call;
764 loop->routine = decl;
765 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
766 ^ (GOMP_DIM_MASK (level) - 1));
767 }
768
769 /* Finish off the current OpenACC loop ending at tail marker TAIL.
770 Return the parent loop. */
771
772 static oacc_loop *
773 finish_oacc_loop (oacc_loop *loop)
774 {
775 /* If the loop has been collapsed, don't partition it. */
776 if (loop->ifns.is_empty ())
777 loop->mask = loop->flags = 0;
778 return loop->parent;
779 }
780
781 /* Free all OpenACC loop structures within LOOP (inclusive). */
782
783 static void
784 free_oacc_loop (oacc_loop *loop)
785 {
786 if (loop->sibling)
787 free_oacc_loop (loop->sibling);
788 if (loop->child)
789 free_oacc_loop (loop->child);
790
791 loop->ifns.release ();
792 free (loop);
793 }
794
795 /* Dump out the OpenACC loop head or tail beginning at FROM. */
796
797 static void
798 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
799 const char *title, int level)
800 {
801 enum ifn_unique_kind kind
802 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
803
804 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
805 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
806 {
807 gimple *stmt = gsi_stmt (gsi);
808
809 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
810 {
811 enum ifn_unique_kind k
812 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
813 (gimple_call_arg (stmt, 0)));
814
815 if (k == kind && stmt != from)
816 break;
817 }
818 print_gimple_stmt (file, stmt, depth * 2 + 2);
819
820 gsi_next (&gsi);
821 while (gsi_end_p (gsi))
822 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
823 }
824 }
825
826 /* Dump OpenACC loop LOOP, its children, and its siblings. */
827
828 static void
829 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
830 {
831 int ix;
832
833 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
834 loop->flags, loop->mask,
835 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
836
837 if (loop->marker)
838 print_gimple_stmt (file, loop->marker, depth * 2);
839
840 if (loop->routine)
841 fprintf (file, "%*sRoutine %s:%u:%s\n",
842 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
843 DECL_SOURCE_LINE (loop->routine),
844 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
845
846 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
847 if (loop->heads[ix])
848 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
849 for (ix = GOMP_DIM_MAX; ix--;)
850 if (loop->tails[ix])
851 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
852
853 if (loop->child)
854 dump_oacc_loop (file, loop->child, depth + 1);
855 if (loop->sibling)
856 dump_oacc_loop (file, loop->sibling, depth);
857 }
858
859 void debug_oacc_loop (oacc_loop *);
860
861 /* Dump loops to stderr. */
862
863 DEBUG_FUNCTION void
864 debug_oacc_loop (oacc_loop *loop)
865 {
866 dump_oacc_loop (stderr, loop, 0);
867 }
868
869 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
870 siblings. */
871
872 static void
873 inform_oacc_loop (const oacc_loop *loop)
874 {
875 const char *gang
876 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
877 const char *worker
878 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
879 const char *vector
880 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
881 const char *seq = loop->mask == 0 ? " seq" : "";
882 const dump_user_location_t loc
883 = dump_user_location_t::from_location_t (loop->loc);
884 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
885 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
886 vector, seq);
887
888 if (loop->child)
889 inform_oacc_loop (loop->child);
890 if (loop->sibling)
891 inform_oacc_loop (loop->sibling);
892 }
893
894 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
895 structures as we go. By construction these loops are properly
896 nested. */
897
898 static void
899 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
900 {
901 int marker = 0;
902 int remaining = 0;
903
904 if (bb->flags & BB_VISITED)
905 return;
906
907 follow:
908 bb->flags |= BB_VISITED;
909
910 /* Scan for loop markers. */
911 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
912 gsi_next (&gsi))
913 {
914 gimple *stmt = gsi_stmt (gsi);
915
916 if (!is_gimple_call (stmt))
917 continue;
918
919 gcall *call = as_a <gcall *> (stmt);
920
921 /* If this is a routine, make a dummy loop for it. */
922 if (tree decl = gimple_call_fndecl (call))
923 if (tree attrs = oacc_get_fn_attrib (decl))
924 {
925 gcc_assert (!marker);
926 new_oacc_loop_routine (loop, call, decl, attrs);
927 }
928
929 if (!gimple_call_internal_p (call))
930 continue;
931
932 switch (gimple_call_internal_fn (call))
933 {
934 default:
935 break;
936
937 case IFN_GOACC_LOOP:
938 case IFN_GOACC_TILE:
939 /* Record the abstraction function, so we can manipulate it
940 later. */
941 loop->ifns.safe_push (call);
942 break;
943
944 case IFN_UNIQUE:
945 enum ifn_unique_kind kind
946 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
947 (gimple_call_arg (call, 0)));
948 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
949 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
950 {
951 if (gimple_call_num_args (call) == 2)
952 {
953 gcc_assert (marker && !remaining);
954 marker = 0;
955 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
956 loop = finish_oacc_loop (loop);
957 else
958 loop->head_end = call;
959 }
960 else
961 {
962 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
963
964 if (!marker)
965 {
966 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
967 loop = new_oacc_loop (loop, call);
968 remaining = count;
969 }
970 gcc_assert (count == remaining);
971 if (remaining)
972 {
973 remaining--;
974 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
975 loop->heads[marker] = call;
976 else
977 loop->tails[remaining] = call;
978 }
979 marker++;
980 }
981 }
982 }
983 }
984 if (remaining || marker)
985 {
986 bb = single_succ (bb);
987 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
988 goto follow;
989 }
990
991 /* Walk successor blocks. */
992 edge e;
993 edge_iterator ei;
994
995 FOR_EACH_EDGE (e, ei, bb->succs)
996 oacc_loop_discover_walk (loop, e->dest);
997 }
998
999 /* LOOP is the first sibling. Reverse the order in place and return
1000 the new first sibling. Recurse to child loops. */
1001
1002 static oacc_loop *
1003 oacc_loop_sibling_nreverse (oacc_loop *loop)
1004 {
1005 oacc_loop *last = NULL;
1006 do
1007 {
1008 if (loop->child)
1009 loop->child = oacc_loop_sibling_nreverse (loop->child);
1010
1011 oacc_loop *next = loop->sibling;
1012 loop->sibling = last;
1013 last = loop;
1014 loop = next;
1015 }
1016 while (loop);
1017
1018 return last;
1019 }
1020
1021 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
1022 the current function. */
1023
1024 static oacc_loop *
1025 oacc_loop_discovery ()
1026 {
1027 /* Clear basic block flags, in particular BB_VISITED which we're going to use
1028 in the following. */
1029 clear_bb_flags ();
1030
1031 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
1032 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
1033
1034 /* The siblings were constructed in reverse order, reverse them so
1035 that diagnostics come out in an unsurprising order. */
1036 top = oacc_loop_sibling_nreverse (top);
1037
1038 return top;
1039 }
1040
1041 /* Transform the abstract internal function markers starting at FROM
1042 to be for partitioning level LEVEL. Stop when we meet another HEAD
1043 or TAIL marker. */
1044
1045 static void
1046 oacc_loop_xform_head_tail (gcall *from, int level)
1047 {
1048 enum ifn_unique_kind kind
1049 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
1050 tree replacement = build_int_cst (unsigned_type_node, level);
1051
1052 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
1053 {
1054 gimple *stmt = gsi_stmt (gsi);
1055
1056 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
1057 {
1058 enum ifn_unique_kind k
1059 = ((enum ifn_unique_kind)
1060 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
1061
1062 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
1063 *gimple_call_arg_ptr (stmt, 2) = replacement;
1064 else if (k == kind && stmt != from)
1065 break;
1066 }
1067 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
1068 *gimple_call_arg_ptr (stmt, 3) = replacement;
1069
1070 gsi_next (&gsi);
1071 while (gsi_end_p (gsi))
1072 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
1073 }
1074 }
1075
1076 /* Process the discovered OpenACC loops, setting the correct
1077 partitioning level etc. */
1078
1079 static void
1080 oacc_loop_process (oacc_loop *loop)
1081 {
1082 if (loop->child)
1083 oacc_loop_process (loop->child);
1084
1085 if (loop->mask && !loop->routine)
1086 {
1087 int ix;
1088 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
1089 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
1090 tree chunk_arg = loop->chunk_size;
1091 gcall *call;
1092
1093 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
1094 switch (gimple_call_internal_fn (call))
1095 {
1096 case IFN_GOACC_LOOP:
1097 {
1098 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
1099 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
1100 if (!is_e)
1101 gimple_call_set_arg (call, 4, chunk_arg);
1102 }
1103 break;
1104
1105 case IFN_GOACC_TILE:
1106 gimple_call_set_arg (call, 3, mask_arg);
1107 gimple_call_set_arg (call, 4, e_mask_arg);
1108 break;
1109
1110 default:
1111 gcc_unreachable ();
1112 }
1113
1114 unsigned dim = GOMP_DIM_GANG;
1115 unsigned mask = loop->mask | loop->e_mask;
1116 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
1117 {
1118 while (!(GOMP_DIM_MASK (dim) & mask))
1119 dim++;
1120
1121 oacc_loop_xform_head_tail (loop->heads[ix], dim);
1122 oacc_loop_xform_head_tail (loop->tails[ix], dim);
1123
1124 mask ^= GOMP_DIM_MASK (dim);
1125 }
1126 }
1127
1128 if (loop->sibling)
1129 oacc_loop_process (loop->sibling);
1130 }
1131
1132 /* Walk the OpenACC loop heirarchy checking and assigning the
1133 programmer-specified partitionings. OUTER_MASK is the partitioning
1134 this loop is contained within. Return mask of partitioning
1135 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
1136 bit. */
1137
1138 static unsigned
1139 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
1140 {
1141 unsigned this_mask = loop->mask;
1142 unsigned mask_all = 0;
1143 bool noisy = true;
1144
1145 #ifdef ACCEL_COMPILER
1146 /* When device_type is supported, we want the device compiler to be
1147 noisy, if the loop parameters are device_type-specific. */
1148 noisy = false;
1149 #endif
1150
1151 if (!loop->routine)
1152 {
1153 bool auto_par = (loop->flags & OLF_AUTO) != 0;
1154 bool seq_par = (loop->flags & OLF_SEQ) != 0;
1155 bool tiling = (loop->flags & OLF_TILE) != 0;
1156
1157 this_mask = ((loop->flags >> OLF_DIM_BASE)
1158 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
1159
1160 /* Apply auto partitioning if this is a non-partitioned regular
1161 loop, or (no more than) single axis tiled loop. */
1162 bool maybe_auto
1163 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
1164
1165 if ((this_mask != 0) + auto_par + seq_par > 1)
1166 {
1167 if (noisy)
1168 error_at (loop->loc,
1169 seq_par
1170 ? G_("%<seq%> overrides other OpenACC loop specifiers")
1171 : G_("%<auto%> conflicts with other OpenACC loop "
1172 "specifiers"));
1173 maybe_auto = false;
1174 loop->flags &= ~OLF_AUTO;
1175 if (seq_par)
1176 {
1177 loop->flags
1178 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
1179 this_mask = 0;
1180 }
1181 }
1182
1183 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
1184 {
1185 loop->flags |= OLF_AUTO;
1186 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
1187 }
1188 }
1189
1190 if (this_mask & outer_mask)
1191 {
1192 const oacc_loop *outer;
1193 for (outer = loop->parent; outer; outer = outer->parent)
1194 if ((outer->mask | outer->e_mask) & this_mask)
1195 break;
1196
1197 if (noisy)
1198 {
1199 if (outer)
1200 {
1201 error_at (loop->loc,
1202 loop->routine
1203 ? G_("routine call uses same OpenACC parallelism"
1204 " as containing loop")
1205 : G_("inner loop uses same OpenACC parallelism"
1206 " as containing loop"));
1207 inform (outer->loc, "containing loop here");
1208 }
1209 else
1210 error_at (loop->loc,
1211 loop->routine
1212 ? G_("routine call uses OpenACC parallelism disallowed"
1213 " by containing routine")
1214 : G_("loop uses OpenACC parallelism disallowed"
1215 " by containing routine"));
1216
1217 if (loop->routine)
1218 inform (DECL_SOURCE_LOCATION (loop->routine),
1219 "routine %qD declared here", loop->routine);
1220 }
1221 this_mask &= ~outer_mask;
1222 }
1223 else
1224 {
1225 unsigned outermost = least_bit_hwi (this_mask);
1226
1227 if (outermost && outermost <= outer_mask)
1228 {
1229 if (noisy)
1230 {
1231 error_at (loop->loc,
1232 "incorrectly nested OpenACC loop parallelism");
1233
1234 const oacc_loop *outer;
1235 for (outer = loop->parent;
1236 outer->flags && outer->flags < outermost;
1237 outer = outer->parent)
1238 continue;
1239 inform (outer->loc, "containing loop here");
1240 }
1241
1242 this_mask &= ~outermost;
1243 }
1244 }
1245
1246 mask_all |= this_mask;
1247
1248 if (loop->flags & OLF_TILE)
1249 {
1250 /* When tiling, vector goes to the element loop, and failing
1251 that we put worker there. The std doesn't contemplate
1252 specifying all three. We choose to put worker and vector on
1253 the element loops in that case. */
1254 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
1255 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
1256 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
1257
1258 loop->e_mask = this_e_mask;
1259 this_mask ^= this_e_mask;
1260 }
1261
1262 loop->mask = this_mask;
1263
1264 if (dump_file)
1265 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
1266 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1267 loop->mask, loop->e_mask);
1268
1269 if (loop->child)
1270 {
1271 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
1272 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
1273 mask_all |= loop->inner;
1274 }
1275
1276 if (loop->sibling)
1277 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
1278
1279 return mask_all;
1280 }
1281
1282 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
1283 OUTER_MASK is the partitioning this loop is contained within.
1284 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
1285 Return the cumulative partitioning used by this loop, siblings and
1286 children. */
1287
1288 static unsigned
1289 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
1290 bool outer_assign)
1291 {
1292 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
1293 bool noisy = true;
1294 bool tiling = loop->flags & OLF_TILE;
1295
1296 #ifdef ACCEL_COMPILER
1297 /* When device_type is supported, we want the device compiler to be
1298 noisy, if the loop parameters are device_type-specific. */
1299 noisy = false;
1300 #endif
1301
1302 if (assign && (!outer_assign || loop->inner))
1303 {
1304 /* Allocate outermost and non-innermost loops at the outermost
1305 non-innermost available level. */
1306 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
1307
1308 /* Find the first outermost available partition. */
1309 while (this_mask <= outer_mask)
1310 this_mask <<= 1;
1311
1312 /* Grab two axes if tiling, and we've not assigned anything */
1313 if (tiling && !(loop->mask | loop->e_mask))
1314 this_mask |= this_mask << 1;
1315
1316 /* Prohibit the innermost partitioning at the moment. */
1317 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
1318
1319 /* Don't use any dimension explicitly claimed by an inner loop. */
1320 this_mask &= ~loop->inner;
1321
1322 if (tiling && !loop->e_mask)
1323 {
1324 /* If we got two axes, allocate the inner one to the element
1325 loop. */
1326 loop->e_mask = this_mask & (this_mask << 1);
1327 this_mask ^= loop->e_mask;
1328 }
1329
1330 loop->mask |= this_mask;
1331 }
1332
1333 if (loop->child)
1334 {
1335 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
1336 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
1337 outer_assign | assign);
1338 }
1339
1340 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
1341 {
1342 /* Allocate the loop at the innermost available level. Note
1343 that we do this even if we already assigned this loop the
1344 outermost available level above. That way we'll partition
1345 this along 2 axes, if they are available. */
1346 unsigned this_mask = 0;
1347
1348 /* Determine the outermost partitioning used within this loop. */
1349 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
1350 this_mask = least_bit_hwi (this_mask);
1351
1352 /* Pick the partitioning just inside that one. */
1353 this_mask >>= 1;
1354
1355 /* And avoid picking one use by an outer loop. */
1356 this_mask &= ~outer_mask;
1357
1358 /* If tiling and we failed completely above, grab the next one
1359 too. Making sure it doesn't hit an outer loop. */
1360 if (tiling)
1361 {
1362 this_mask &= ~(loop->e_mask | loop->mask);
1363 unsigned tile_mask = ((this_mask >> 1)
1364 & ~(outer_mask | loop->e_mask | loop->mask));
1365
1366 if (tile_mask || loop->mask)
1367 {
1368 loop->e_mask |= this_mask;
1369 this_mask = tile_mask;
1370 }
1371 if (!loop->e_mask && noisy)
1372 warning_at (loop->loc, 0,
1373 "insufficient partitioning available"
1374 " to parallelize element loop");
1375 }
1376
1377 loop->mask |= this_mask;
1378 if (!loop->mask && noisy)
1379 warning_at (loop->loc, 0,
1380 tiling
1381 ? G_("insufficient partitioning available"
1382 " to parallelize tile loop")
1383 : G_("insufficient partitioning available"
1384 " to parallelize loop"));
1385 }
1386
1387 if (assign && dump_file)
1388 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
1389 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
1390 loop->mask, loop->e_mask);
1391
1392 unsigned inner_mask = 0;
1393
1394 if (loop->sibling)
1395 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
1396 outer_mask, outer_assign);
1397
1398 inner_mask |= loop->inner | loop->mask | loop->e_mask;
1399
1400 return inner_mask;
1401 }
1402
1403 /* Walk the OpenACC loop heirarchy to check and assign partitioning
1404 axes. Return mask of partitioning. */
1405
1406 static unsigned
1407 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
1408 {
1409 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
1410
1411 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
1412 {
1413 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
1414 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
1415 }
1416 return mask_all;
1417 }
1418
1419 /* Default fork/join early expander. Delete the function calls if
1420 there is no RTL expander. */
1421
1422 bool
1423 default_goacc_fork_join (gcall *ARG_UNUSED (call),
1424 const int *ARG_UNUSED (dims), bool is_fork)
1425 {
1426 if (is_fork)
1427 return targetm.have_oacc_fork ();
1428 else
1429 return targetm.have_oacc_join ();
1430 }
1431
1432 /* Default goacc.reduction early expander.
1433
1434 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
1435 If RES_PTR is not integer-zerop:
1436 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
1437 TEARDOWN - emit '*RES_PTR = VAR'
1438 If LHS is not NULL
1439 emit 'LHS = VAR' */
1440
1441 void
1442 default_goacc_reduction (gcall *call)
1443 {
1444 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
1445 gimple_stmt_iterator gsi = gsi_for_stmt (call);
1446 tree lhs = gimple_call_lhs (call);
1447 tree var = gimple_call_arg (call, 2);
1448 gimple_seq seq = NULL;
1449
1450 if (code == IFN_GOACC_REDUCTION_SETUP
1451 || code == IFN_GOACC_REDUCTION_TEARDOWN)
1452 {
1453 /* Setup and Teardown need to copy from/to the receiver object,
1454 if there is one. */
1455 tree ref_to_res = gimple_call_arg (call, 1);
1456
1457 if (!integer_zerop (ref_to_res))
1458 {
1459 tree dst = build_simple_mem_ref (ref_to_res);
1460 tree src = var;
1461
1462 if (code == IFN_GOACC_REDUCTION_SETUP)
1463 {
1464 src = dst;
1465 dst = lhs;
1466 lhs = NULL;
1467 }
1468 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
1469 }
1470 }
1471
1472 /* Copy VAR to LHS, if there is an LHS. */
1473 if (lhs)
1474 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
1475
1476 gsi_replace_with_seq (&gsi, seq, true);
1477 }
1478
1479 /* Main entry point for oacc transformations which run on the device
1480 compiler after LTO, so we know what the target device is at this
1481 point (including the host fallback). */
1482
1483 static unsigned int
1484 execute_oacc_device_lower ()
1485 {
1486 tree attrs = oacc_get_fn_attrib (current_function_decl);
1487
1488 if (!attrs)
1489 /* Not an offloaded function. */
1490 return 0;
1491
1492 /* Parse the default dim argument exactly once. */
1493 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
1494 {
1495 oacc_parse_default_dims (flag_openacc_dims);
1496 flag_openacc_dims = (char *)&flag_openacc_dims;
1497 }
1498
1499 bool is_oacc_kernels
1500 = (lookup_attribute ("oacc kernels",
1501 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1502 bool is_oacc_kernels_parallelized
1503 = (lookup_attribute ("oacc kernels parallelized",
1504 DECL_ATTRIBUTES (current_function_decl)) != NULL);
1505
1506 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
1507 kernels, so remove the parallelism dimensions function attributes
1508 potentially set earlier on. */
1509 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
1510 {
1511 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
1512 attrs = oacc_get_fn_attrib (current_function_decl);
1513 }
1514
1515 /* Discover, partition and process the loops. */
1516 oacc_loop *loops = oacc_loop_discovery ();
1517 int fn_level = oacc_fn_attrib_level (attrs);
1518
1519 if (dump_file)
1520 {
1521 if (fn_level >= 0)
1522 fprintf (dump_file, "Function is OpenACC routine level %d\n",
1523 fn_level);
1524 else if (is_oacc_kernels)
1525 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
1526 (is_oacc_kernels_parallelized
1527 ? "parallelized" : "unparallelized"));
1528 else
1529 fprintf (dump_file, "Function is OpenACC parallel offload\n");
1530 }
1531
1532 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
1533 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
1534 /* OpenACC kernels constructs are special: they currently don't use the
1535 generic oacc_loop infrastructure and attribute/dimension processing. */
1536 if (is_oacc_kernels && is_oacc_kernels_parallelized)
1537 {
1538 /* Parallelized OpenACC kernels constructs use gang parallelism. See
1539 also tree-parloops.c:create_parallel_loop. */
1540 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
1541 }
1542
1543 int dims[GOMP_DIM_MAX];
1544 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
1545
1546 if (dump_file)
1547 {
1548 const char *comma = "Compute dimensions [";
1549 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
1550 fprintf (dump_file, "%s%d", comma, dims[ix]);
1551 fprintf (dump_file, "]\n");
1552 }
1553
1554 oacc_loop_process (loops);
1555 if (dump_file)
1556 {
1557 fprintf (dump_file, "OpenACC loops\n");
1558 dump_oacc_loop (dump_file, loops, 0);
1559 fprintf (dump_file, "\n");
1560 }
1561 if (dump_enabled_p ())
1562 {
1563 oacc_loop *l = loops;
1564 /* OpenACC kernels constructs are special: they currently don't use the
1565 generic oacc_loop infrastructure. */
1566 if (is_oacc_kernels)
1567 {
1568 /* Create a fake oacc_loop for diagnostic purposes. */
1569 l = new_oacc_loop_raw (NULL,
1570 DECL_SOURCE_LOCATION (current_function_decl));
1571 l->mask = used_mask;
1572 }
1573 else
1574 {
1575 /* Skip the outermost, dummy OpenACC loop */
1576 l = l->child;
1577 }
1578 if (l)
1579 inform_oacc_loop (l);
1580 if (is_oacc_kernels)
1581 free_oacc_loop (l);
1582 }
1583
1584 /* Offloaded targets may introduce new basic blocks, which require
1585 dominance information to update SSA. */
1586 calculate_dominance_info (CDI_DOMINATORS);
1587
1588 /* Now lower internal loop functions to target-specific code
1589 sequences. */
1590 basic_block bb;
1591 FOR_ALL_BB_FN (bb, cfun)
1592 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1593 {
1594 gimple *stmt = gsi_stmt (gsi);
1595 if (!is_gimple_call (stmt))
1596 {
1597 gsi_next (&gsi);
1598 continue;
1599 }
1600
1601 gcall *call = as_a <gcall *> (stmt);
1602 if (!gimple_call_internal_p (call))
1603 {
1604 gsi_next (&gsi);
1605 continue;
1606 }
1607
1608 /* Rewind to allow rescan. */
1609 gsi_prev (&gsi);
1610 bool rescan = false, remove = false;
1611 enum internal_fn ifn_code = gimple_call_internal_fn (call);
1612
1613 switch (ifn_code)
1614 {
1615 default: break;
1616
1617 case IFN_GOACC_TILE:
1618 oacc_xform_tile (call);
1619 rescan = true;
1620 break;
1621
1622 case IFN_GOACC_LOOP:
1623 oacc_xform_loop (call);
1624 rescan = true;
1625 break;
1626
1627 case IFN_GOACC_REDUCTION:
1628 /* Mark the function for SSA renaming. */
1629 mark_virtual_operands_for_renaming (cfun);
1630
1631 /* If the level is -1, this ended up being an unused
1632 axis. Handle as a default. */
1633 if (integer_minus_onep (gimple_call_arg (call, 3)))
1634 default_goacc_reduction (call);
1635 else
1636 targetm.goacc.reduction (call);
1637 rescan = true;
1638 break;
1639
1640 case IFN_UNIQUE:
1641 {
1642 enum ifn_unique_kind kind
1643 = ((enum ifn_unique_kind)
1644 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
1645
1646 switch (kind)
1647 {
1648 default:
1649 break;
1650
1651 case IFN_UNIQUE_OACC_FORK:
1652 case IFN_UNIQUE_OACC_JOIN:
1653 if (integer_minus_onep (gimple_call_arg (call, 2)))
1654 remove = true;
1655 else if (!targetm.goacc.fork_join
1656 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
1657 remove = true;
1658 break;
1659
1660 case IFN_UNIQUE_OACC_HEAD_MARK:
1661 case IFN_UNIQUE_OACC_TAIL_MARK:
1662 remove = true;
1663 break;
1664 }
1665 break;
1666 }
1667 }
1668
1669 if (gsi_end_p (gsi))
1670 /* We rewound past the beginning of the BB. */
1671 gsi = gsi_start_bb (bb);
1672 else
1673 /* Undo the rewind. */
1674 gsi_next (&gsi);
1675
1676 if (remove)
1677 {
1678 if (gimple_vdef (call))
1679 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
1680 if (gimple_call_lhs (call))
1681 {
1682 /* Propagate the data dependency var. */
1683 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
1684 gimple_call_arg (call, 1));
1685 gsi_replace (&gsi, ass, false);
1686 }
1687 else
1688 gsi_remove (&gsi, true);
1689 }
1690 else if (!rescan)
1691 /* If not rescanning, advance over the call. */
1692 gsi_next (&gsi);
1693 }
1694
1695 free_oacc_loop (loops);
1696
1697 return 0;
1698 }
1699
1700 /* Default launch dimension validator. Force everything to 1. A
1701 backend that wants to provide larger dimensions must override this
1702 hook. */
1703
1704 bool
1705 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
1706 int ARG_UNUSED (fn_level))
1707 {
1708 bool changed = false;
1709
1710 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
1711 {
1712 if (dims[ix] != 1)
1713 {
1714 dims[ix] = 1;
1715 changed = true;
1716 }
1717 }
1718
1719 return changed;
1720 }
1721
1722 /* Default dimension bound is unknown on accelerator and 1 on host. */
1723
1724 int
1725 default_goacc_dim_limit (int ARG_UNUSED (axis))
1726 {
1727 #ifdef ACCEL_COMPILER
1728 return 0;
1729 #else
1730 return 1;
1731 #endif
1732 }
1733
1734 namespace {
1735
1736 const pass_data pass_data_oacc_device_lower =
1737 {
1738 GIMPLE_PASS, /* type */
1739 "oaccdevlow", /* name */
1740 OPTGROUP_OMP, /* optinfo_flags */
1741 TV_NONE, /* tv_id */
1742 PROP_cfg, /* properties_required */
1743 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
1744 0, /* properties_destroyed */
1745 0, /* todo_flags_start */
1746 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
1747 };
1748
1749 class pass_oacc_device_lower : public gimple_opt_pass
1750 {
1751 public:
1752 pass_oacc_device_lower (gcc::context *ctxt)
1753 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
1754 {}
1755
1756 /* opt_pass methods: */
1757 virtual bool gate (function *) { return flag_openacc; };
1758
1759 virtual unsigned int execute (function *)
1760 {
1761 return execute_oacc_device_lower ();
1762 }
1763
1764 }; // class pass_oacc_device_lower
1765
1766 } // anon namespace
1767
1768 gimple_opt_pass *
1769 make_pass_oacc_device_lower (gcc::context *ctxt)
1770 {
1771 return new pass_oacc_device_lower (ctxt);
1772 }
1773
1774 \f
1775 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
1776 GOMP_SIMT_ENTER call identifying the privatized variables, which are
1777 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
1778 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
1779
1780 static void
1781 ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
1782 {
1783 gimple *alloc_stmt = gsi_stmt (*gsi);
1784 tree simtrec = gimple_call_lhs (alloc_stmt);
1785 tree simduid = gimple_call_arg (alloc_stmt, 0);
1786 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
1787 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
1788 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
1789 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
1790 TREE_ADDRESSABLE (rectype) = 1;
1791 TREE_TYPE (simtrec) = build_pointer_type (rectype);
1792 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
1793 {
1794 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
1795 if (*argp == null_pointer_node)
1796 continue;
1797 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
1798 && VAR_P (TREE_OPERAND (*argp, 0)));
1799 tree var = TREE_OPERAND (*argp, 0);
1800
1801 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
1802 DECL_NAME (var), TREE_TYPE (var));
1803 SET_DECL_ALIGN (field, DECL_ALIGN (var));
1804 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
1805 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
1806
1807 insert_field_into_struct (rectype, field);
1808
1809 tree t = build_simple_mem_ref (simtrec);
1810 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
1811 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
1812 SET_DECL_VALUE_EXPR (var, t);
1813 DECL_HAS_VALUE_EXPR_P (var) = 1;
1814 *regimplify = true;
1815 }
1816 layout_type (rectype);
1817 tree size = TYPE_SIZE_UNIT (rectype);
1818 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
1819
1820 alloc_stmt
1821 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
1822 gimple_call_set_lhs (alloc_stmt, simtrec);
1823 gsi_replace (gsi, alloc_stmt, false);
1824 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
1825 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
1826 gsi_replace (&enter_gsi, enter_stmt, false);
1827
1828 use_operand_p use;
1829 gimple *exit_stmt;
1830 if (single_imm_use (simtrec, &use, &exit_stmt))
1831 {
1832 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
1833 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
1834 tree clobber = build_constructor (rectype, NULL);
1835 TREE_THIS_VOLATILE (clobber) = 1;
1836 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
1837 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
1838 }
1839 else
1840 gcc_checking_assert (has_zero_uses (simtrec));
1841 }
1842
1843 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
1844
1845 static tree
1846 find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
1847 {
1848 tree t = *tp;
1849
1850 if (VAR_P (t)
1851 && DECL_HAS_VALUE_EXPR_P (t)
1852 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
1853 {
1854 *walk_subtrees = 0;
1855 return t;
1856 }
1857 return NULL_TREE;
1858 }
1859
1860 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
1861 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
1862 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
1863 internal functions on non-SIMT targets, and likewise some SIMD internal
1864 functions on SIMT targets. */
1865
1866 static unsigned int
1867 execute_omp_device_lower ()
1868 {
1869 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
1870 bool regimplify = false;
1871 basic_block bb;
1872 gimple_stmt_iterator gsi;
1873 FOR_EACH_BB_FN (bb, cfun)
1874 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1875 {
1876 gimple *stmt = gsi_stmt (gsi);
1877 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
1878 continue;
1879 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
1880 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
1881 switch (gimple_call_internal_fn (stmt))
1882 {
1883 case IFN_GOMP_USE_SIMT:
1884 rhs = vf == 1 ? integer_zero_node : integer_one_node;
1885 break;
1886 case IFN_GOMP_SIMT_ENTER:
1887 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1888 goto simtreg_enter_exit;
1889 case IFN_GOMP_SIMT_ENTER_ALLOC:
1890 if (vf != 1)
1891 ompdevlow_adjust_simt_enter (&gsi, &regimplify);
1892 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
1893 goto simtreg_enter_exit;
1894 case IFN_GOMP_SIMT_EXIT:
1895 simtreg_enter_exit:
1896 if (vf != 1)
1897 continue;
1898 unlink_stmt_vdef (stmt);
1899 break;
1900 case IFN_GOMP_SIMT_LANE:
1901 case IFN_GOMP_SIMT_LAST_LANE:
1902 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
1903 break;
1904 case IFN_GOMP_SIMT_VF:
1905 rhs = build_int_cst (type, vf);
1906 break;
1907 case IFN_GOMP_SIMT_ORDERED_PRED:
1908 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
1909 if (rhs || !lhs)
1910 unlink_stmt_vdef (stmt);
1911 break;
1912 case IFN_GOMP_SIMT_VOTE_ANY:
1913 case IFN_GOMP_SIMT_XCHG_BFLY:
1914 case IFN_GOMP_SIMT_XCHG_IDX:
1915 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
1916 break;
1917 case IFN_GOMP_SIMD_LANE:
1918 case IFN_GOMP_SIMD_LAST_LANE:
1919 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
1920 break;
1921 case IFN_GOMP_SIMD_VF:
1922 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
1923 break;
1924 default:
1925 continue;
1926 }
1927 if (lhs && !rhs)
1928 continue;
1929 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
1930 gsi_replace (&gsi, stmt, false);
1931 }
1932 if (regimplify)
1933 FOR_EACH_BB_REVERSE_FN (bb, cfun)
1934 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1935 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
1936 {
1937 if (gimple_clobber_p (gsi_stmt (gsi)))
1938 gsi_remove (&gsi, true);
1939 else
1940 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
1941 }
1942 if (vf != 1)
1943 cfun->has_force_vectorize_loops = false;
1944 return 0;
1945 }
1946
1947 namespace {
1948
1949 const pass_data pass_data_omp_device_lower =
1950 {
1951 GIMPLE_PASS, /* type */
1952 "ompdevlow", /* name */
1953 OPTGROUP_OMP, /* optinfo_flags */
1954 TV_NONE, /* tv_id */
1955 PROP_cfg, /* properties_required */
1956 PROP_gimple_lomp_dev, /* properties_provided */
1957 0, /* properties_destroyed */
1958 0, /* todo_flags_start */
1959 TODO_update_ssa, /* todo_flags_finish */
1960 };
1961
1962 class pass_omp_device_lower : public gimple_opt_pass
1963 {
1964 public:
1965 pass_omp_device_lower (gcc::context *ctxt)
1966 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
1967 {}
1968
1969 /* opt_pass methods: */
1970 virtual bool gate (function *fun)
1971 {
1972 return !(fun->curr_properties & PROP_gimple_lomp_dev);
1973 }
1974 virtual unsigned int execute (function *)
1975 {
1976 return execute_omp_device_lower ();
1977 }
1978
1979 }; // class pass_expand_omp_ssa
1980
1981 } // anon namespace
1982
1983 gimple_opt_pass *
1984 make_pass_omp_device_lower (gcc::context *ctxt)
1985 {
1986 return new pass_omp_device_lower (ctxt);
1987 }
1988
1989 /* "omp declare target link" handling pass. */
1990
1991 namespace {
1992
1993 const pass_data pass_data_omp_target_link =
1994 {
1995 GIMPLE_PASS, /* type */
1996 "omptargetlink", /* name */
1997 OPTGROUP_OMP, /* optinfo_flags */
1998 TV_NONE, /* tv_id */
1999 PROP_ssa, /* properties_required */
2000 0, /* properties_provided */
2001 0, /* properties_destroyed */
2002 0, /* todo_flags_start */
2003 TODO_update_ssa, /* todo_flags_finish */
2004 };
2005
2006 class pass_omp_target_link : public gimple_opt_pass
2007 {
2008 public:
2009 pass_omp_target_link (gcc::context *ctxt)
2010 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
2011 {}
2012
2013 /* opt_pass methods: */
2014 virtual bool gate (function *fun)
2015 {
2016 #ifdef ACCEL_COMPILER
2017 return offloading_function_p (fun->decl);
2018 #else
2019 (void) fun;
2020 return false;
2021 #endif
2022 }
2023
2024 virtual unsigned execute (function *);
2025 };
2026
2027 /* Callback for walk_gimple_stmt used to scan for link var operands. */
2028
2029 static tree
2030 find_link_var_op (tree *tp, int *walk_subtrees, void *)
2031 {
2032 tree t = *tp;
2033
2034 if (VAR_P (t)
2035 && DECL_HAS_VALUE_EXPR_P (t)
2036 && is_global_var (t)
2037 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
2038 {
2039 *walk_subtrees = 0;
2040 return t;
2041 }
2042
2043 return NULL_TREE;
2044 }
2045
2046 unsigned
2047 pass_omp_target_link::execute (function *fun)
2048 {
2049 basic_block bb;
2050 FOR_EACH_BB_FN (bb, fun)
2051 {
2052 gimple_stmt_iterator gsi;
2053 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2054 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
2055 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
2056 }
2057
2058 return 0;
2059 }
2060
2061 } // anon namespace
2062
2063 gimple_opt_pass *
2064 make_pass_omp_target_link (gcc::context *ctxt)
2065 {
2066 return new pass_omp_target_link (ctxt);
2067 }