Add tests for print from offload target.
[gcc.git] / libgomp / oacc-parallel.c
1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
2
3 Contributed by Mentor Embedded.
4
5 This file is part of the GNU Offloading and Multi Processing Library
6 (libgomp).
7
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 more details.
17
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
21
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
26
27 /* This file handles OpenACC constructs. */
28
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "gomp-constants.h"
32 #include "oacc-int.h"
33 #ifdef HAVE_INTTYPES_H
34 # include <inttypes.h> /* For PRIu64. */
35 #endif
36 #include <string.h>
37 #include <stdarg.h>
38 #include <assert.h>
39
40
41 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
42 continue to support the following two legacy values. */
43 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV) == 0,
44 "legacy GOMP_DEVICE_ICV broken");
45 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK)
46 == GOACC_FLAG_HOST_FALLBACK,
47 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
48
49
50 /* Returns the number of mappings associated with the pointer or pset. PSET
51 have three mappings, whereas pointer have two. */
52
53 static int
54 find_pointer (int pos, size_t mapnum, unsigned short *kinds)
55 {
56 if (pos + 1 >= mapnum)
57 return 0;
58
59 unsigned char kind = kinds[pos+1] & 0xff;
60
61 if (kind == GOMP_MAP_TO_PSET)
62 return 3;
63 else if (kind == GOMP_MAP_POINTER)
64 return 2;
65
66 return 0;
67 }
68
69 /* Handle the mapping pair that are presented when a
70 deviceptr clause is used with Fortran. */
71
72 static void
73 handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
74 unsigned short *kinds)
75 {
76 int i;
77
78 for (i = 0; i < mapnum; i++)
79 {
80 unsigned short kind1 = kinds[i] & 0xff;
81
82 /* Handle Fortran deviceptr clause. */
83 if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
84 {
85 unsigned short kind2;
86
87 if (i < (signed)mapnum - 1)
88 kind2 = kinds[i + 1] & 0xff;
89 else
90 kind2 = 0xffff;
91
92 if (sizes[i] == sizeof (void *))
93 continue;
94
95 /* At this point, we're dealing with a Fortran deviceptr.
96 If the next element is not what we're expecting, then
97 this is an instance of where the deviceptr variable was
98 not used within the region and the pointer was removed
99 by the gimplifier. */
100 if (kind2 == GOMP_MAP_POINTER
101 && sizes[i + 1] == 0
102 && hostaddrs[i] == *(void **)hostaddrs[i + 1])
103 {
104 kinds[i+1] = kinds[i];
105 sizes[i+1] = sizeof (void *);
106 }
107
108 /* Invalidate the entry. */
109 hostaddrs[i] = NULL;
110 }
111 }
112 }
113
114 static void goacc_wait (int async, int num_waits, va_list *ap);
115
116
117 /* Launch a possibly offloaded function with FLAGS. FN is the host fn
118 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
119 blocks to be copied to/from the device. Varadic arguments are
120 keyed optional parameters terminated with a zero. */
121
122 void
123 GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
124 size_t mapnum, void **hostaddrs, size_t *sizes,
125 unsigned short *kinds, ...)
126 {
127 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
128
129 va_list ap;
130 struct goacc_thread *thr;
131 struct gomp_device_descr *acc_dev;
132 struct target_mem_desc *tgt;
133 void **devaddrs;
134 unsigned int i;
135 struct splay_tree_key_s k;
136 splay_tree_key tgt_fn_key;
137 void (*tgt_fn);
138 int async = GOMP_ASYNC_SYNC;
139 unsigned dims[GOMP_DIM_MAX];
140 unsigned tag;
141
142 #ifdef HAVE_INTTYPES_H
143 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
144 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
145 #else
146 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
147 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
148 #endif
149 goacc_lazy_initialize ();
150
151 thr = goacc_thread ();
152 acc_dev = thr->dev;
153
154 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
155
156 acc_prof_info prof_info;
157 if (profiling_p)
158 {
159 thr->prof_info = &prof_info;
160
161 prof_info.event_type = acc_ev_compute_construct_start;
162 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
163 prof_info.version = _ACC_PROF_INFO_VERSION;
164 prof_info.device_type = acc_device_type (acc_dev->type);
165 prof_info.device_number = acc_dev->target_id;
166 prof_info.thread_id = -1;
167 prof_info.async = async;
168 prof_info.async_queue = prof_info.async;
169 prof_info.src_file = NULL;
170 prof_info.func_name = NULL;
171 prof_info.line_no = -1;
172 prof_info.end_line_no = -1;
173 prof_info.func_line_no = -1;
174 prof_info.func_end_line_no = -1;
175 }
176 acc_event_info compute_construct_event_info;
177 if (profiling_p)
178 {
179 compute_construct_event_info.other_event.event_type
180 = prof_info.event_type;
181 compute_construct_event_info.other_event.valid_bytes
182 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
183 compute_construct_event_info.other_event.parent_construct
184 = acc_construct_parallel;
185 compute_construct_event_info.other_event.implicit = 0;
186 compute_construct_event_info.other_event.tool_info = NULL;
187 }
188 acc_api_info api_info;
189 if (profiling_p)
190 {
191 thr->api_info = &api_info;
192
193 api_info.device_api = acc_device_api_none;
194 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
195 api_info.device_type = prof_info.device_type;
196 api_info.vendor = -1;
197 api_info.device_handle = NULL;
198 api_info.context_handle = NULL;
199 api_info.async_handle = NULL;
200 }
201
202 if (profiling_p)
203 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
204 &api_info);
205
206 handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
207
208 /* Host fallback if "if" clause is false or if the current device is set to
209 the host. */
210 if (flags & GOACC_FLAG_HOST_FALLBACK)
211 {
212 prof_info.device_type = acc_device_host;
213 api_info.device_type = prof_info.device_type;
214 goacc_save_and_set_bind (acc_device_host);
215 fn (hostaddrs);
216 goacc_restore_bind ();
217 goto out_prof;
218 }
219 else if (acc_device_type (acc_dev->type) == acc_device_host)
220 {
221 fn (hostaddrs);
222 goto out_prof;
223 }
224
225 /* Default: let the runtime choose. */
226 for (i = 0; i != GOMP_DIM_MAX; i++)
227 dims[i] = 0;
228
229 va_start (ap, kinds);
230 /* TODO: This will need amending when device_type is implemented. */
231 while ((tag = va_arg (ap, unsigned)) != 0)
232 {
233 if (GOMP_LAUNCH_DEVICE (tag))
234 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
235 GOMP_LAUNCH_DEVICE (tag));
236
237 switch (GOMP_LAUNCH_CODE (tag))
238 {
239 case GOMP_LAUNCH_DIM:
240 {
241 unsigned mask = GOMP_LAUNCH_OP (tag);
242
243 for (i = 0; i != GOMP_DIM_MAX; i++)
244 if (mask & GOMP_DIM_MASK (i))
245 dims[i] = va_arg (ap, unsigned);
246 }
247 break;
248
249 case GOMP_LAUNCH_ASYNC:
250 {
251 /* Small constant values are encoded in the operand. */
252 async = GOMP_LAUNCH_OP (tag);
253
254 if (async == GOMP_LAUNCH_OP_MAX)
255 async = va_arg (ap, unsigned);
256
257 if (profiling_p)
258 {
259 prof_info.async = async;
260 prof_info.async_queue = prof_info.async;
261 }
262
263 break;
264 }
265
266 case GOMP_LAUNCH_WAIT:
267 {
268 unsigned num_waits = GOMP_LAUNCH_OP (tag);
269 goacc_wait (async, num_waits, &ap);
270 break;
271 }
272
273 default:
274 gomp_fatal ("unrecognized offload code '%d',"
275 " libgomp is too old", GOMP_LAUNCH_CODE (tag));
276 }
277 }
278 va_end (ap);
279
280 if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
281 {
282 k.host_start = (uintptr_t) fn;
283 k.host_end = k.host_start + 1;
284 gomp_mutex_lock (&acc_dev->lock);
285 tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
286 gomp_mutex_unlock (&acc_dev->lock);
287
288 if (tgt_fn_key == NULL)
289 gomp_fatal ("target function wasn't mapped");
290
291 tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
292 }
293 else
294 tgt_fn = (void (*)) fn;
295
296 acc_event_info enter_exit_data_event_info;
297 if (profiling_p)
298 {
299 prof_info.event_type = acc_ev_enter_data_start;
300 enter_exit_data_event_info.other_event.event_type
301 = prof_info.event_type;
302 enter_exit_data_event_info.other_event.valid_bytes
303 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
304 enter_exit_data_event_info.other_event.parent_construct
305 = compute_construct_event_info.other_event.parent_construct;
306 enter_exit_data_event_info.other_event.implicit = 1;
307 enter_exit_data_event_info.other_event.tool_info = NULL;
308 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
309 &api_info);
310 }
311
312 goacc_aq aq = get_goacc_asyncqueue (async);
313
314 tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
315 true, GOMP_MAP_VARS_OPENACC);
316 if (profiling_p)
317 {
318 prof_info.event_type = acc_ev_enter_data_end;
319 enter_exit_data_event_info.other_event.event_type
320 = prof_info.event_type;
321 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
322 &api_info);
323 }
324
325 devaddrs = gomp_alloca (sizeof (void *) * mapnum);
326 for (i = 0; i < mapnum; i++)
327 if (tgt->list[i].key != NULL)
328 devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
329 + tgt->list[i].key->tgt_offset
330 + tgt->list[i].offset);
331 else
332 devaddrs[i] = NULL;
333 if (aq == NULL)
334 acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
335 tgt);
336 else
337 acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
338 dims, tgt, aq);
339
340 if (profiling_p)
341 {
342 prof_info.event_type = acc_ev_exit_data_start;
343 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
344 enter_exit_data_event_info.other_event.tool_info = NULL;
345 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
346 &api_info);
347 }
348
349 /* If running synchronously, unmap immediately. */
350 if (aq == NULL)
351 gomp_unmap_vars (tgt, true);
352 else
353 gomp_unmap_vars_async (tgt, true, aq);
354
355 if (profiling_p)
356 {
357 prof_info.event_type = acc_ev_exit_data_end;
358 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
359 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
360 &api_info);
361 }
362
363 out_prof:
364 if (profiling_p)
365 {
366 prof_info.event_type = acc_ev_compute_construct_end;
367 compute_construct_event_info.other_event.event_type
368 = prof_info.event_type;
369 goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
370 &api_info);
371
372 thr->prof_info = NULL;
373 thr->api_info = NULL;
374 }
375 }
376
377 /* Legacy entry point (GCC 5). Only provide host fallback execution. */
378
379 void
380 GOACC_parallel (int flags_m, void (*fn) (void *),
381 size_t mapnum, void **hostaddrs, size_t *sizes,
382 unsigned short *kinds,
383 int num_gangs, int num_workers, int vector_length,
384 int async, int num_waits, ...)
385 {
386 goacc_save_and_set_bind (acc_device_host);
387 fn (hostaddrs);
388 goacc_restore_bind ();
389 }
390
391 void
392 GOACC_data_start (int flags_m, size_t mapnum,
393 void **hostaddrs, size_t *sizes, unsigned short *kinds)
394 {
395 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
396
397 struct target_mem_desc *tgt;
398
399 #ifdef HAVE_INTTYPES_H
400 gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
401 __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
402 #else
403 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
404 __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
405 #endif
406
407 goacc_lazy_initialize ();
408
409 struct goacc_thread *thr = goacc_thread ();
410 struct gomp_device_descr *acc_dev = thr->dev;
411
412 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
413
414 acc_prof_info prof_info;
415 if (profiling_p)
416 {
417 thr->prof_info = &prof_info;
418
419 prof_info.event_type = acc_ev_enter_data_start;
420 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
421 prof_info.version = _ACC_PROF_INFO_VERSION;
422 prof_info.device_type = acc_device_type (acc_dev->type);
423 prof_info.device_number = acc_dev->target_id;
424 prof_info.thread_id = -1;
425 prof_info.async = acc_async_sync; /* Always synchronous. */
426 prof_info.async_queue = prof_info.async;
427 prof_info.src_file = NULL;
428 prof_info.func_name = NULL;
429 prof_info.line_no = -1;
430 prof_info.end_line_no = -1;
431 prof_info.func_line_no = -1;
432 prof_info.func_end_line_no = -1;
433 }
434 acc_event_info enter_data_event_info;
435 if (profiling_p)
436 {
437 enter_data_event_info.other_event.event_type
438 = prof_info.event_type;
439 enter_data_event_info.other_event.valid_bytes
440 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
441 enter_data_event_info.other_event.parent_construct = acc_construct_data;
442 for (int i = 0; i < mapnum; ++i)
443 if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR)
444 {
445 /* If there is one such data mapping kind, then this is actually an
446 OpenACC 'host_data' construct. (GCC maps the OpenACC
447 'host_data' construct to the OpenACC 'data' construct.) Apart
448 from artificial test cases (such as an OpenACC 'host_data'
449 construct's (implicit) device initialization when there hasn't
450 been any device data be set up before...), there can't really
451 any meaningful events be generated from OpenACC 'host_data'
452 constructs, though. */
453 enter_data_event_info.other_event.parent_construct
454 = acc_construct_host_data;
455 break;
456 }
457 enter_data_event_info.other_event.implicit = 0;
458 enter_data_event_info.other_event.tool_info = NULL;
459 }
460 acc_api_info api_info;
461 if (profiling_p)
462 {
463 thr->api_info = &api_info;
464
465 api_info.device_api = acc_device_api_none;
466 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
467 api_info.device_type = prof_info.device_type;
468 api_info.vendor = -1;
469 api_info.device_handle = NULL;
470 api_info.context_handle = NULL;
471 api_info.async_handle = NULL;
472 }
473
474 if (profiling_p)
475 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
476
477 /* Host fallback or 'do nothing'. */
478 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
479 || (flags & GOACC_FLAG_HOST_FALLBACK))
480 {
481 prof_info.device_type = acc_device_host;
482 api_info.device_type = prof_info.device_type;
483 tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
484 GOMP_MAP_VARS_OPENACC);
485 tgt->prev = thr->mapped_data;
486 thr->mapped_data = tgt;
487
488 goto out_prof;
489 }
490
491 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
492 tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
493 GOMP_MAP_VARS_OPENACC);
494 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
495 tgt->prev = thr->mapped_data;
496 thr->mapped_data = tgt;
497
498 out_prof:
499 if (profiling_p)
500 {
501 prof_info.event_type = acc_ev_enter_data_end;
502 enter_data_event_info.other_event.event_type = prof_info.event_type;
503 goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
504
505 thr->prof_info = NULL;
506 thr->api_info = NULL;
507 }
508 }
509
510 void
511 GOACC_data_end (void)
512 {
513 struct goacc_thread *thr = goacc_thread ();
514 struct gomp_device_descr *acc_dev = thr->dev;
515 struct target_mem_desc *tgt = thr->mapped_data;
516
517 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
518
519 acc_prof_info prof_info;
520 if (profiling_p)
521 {
522 thr->prof_info = &prof_info;
523
524 prof_info.event_type = acc_ev_exit_data_start;
525 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
526 prof_info.version = _ACC_PROF_INFO_VERSION;
527 prof_info.device_type = acc_device_type (acc_dev->type);
528 prof_info.device_number = acc_dev->target_id;
529 prof_info.thread_id = -1;
530 prof_info.async = acc_async_sync; /* Always synchronous. */
531 prof_info.async_queue = prof_info.async;
532 prof_info.src_file = NULL;
533 prof_info.func_name = NULL;
534 prof_info.line_no = -1;
535 prof_info.end_line_no = -1;
536 prof_info.func_line_no = -1;
537 prof_info.func_end_line_no = -1;
538 }
539 acc_event_info exit_data_event_info;
540 if (profiling_p)
541 {
542 exit_data_event_info.other_event.event_type
543 = prof_info.event_type;
544 exit_data_event_info.other_event.valid_bytes
545 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
546 exit_data_event_info.other_event.parent_construct = acc_construct_data;
547 exit_data_event_info.other_event.implicit = 0;
548 exit_data_event_info.other_event.tool_info = NULL;
549 }
550 acc_api_info api_info;
551 if (profiling_p)
552 {
553 thr->api_info = &api_info;
554
555 api_info.device_api = acc_device_api_none;
556 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
557 api_info.device_type = prof_info.device_type;
558 api_info.vendor = -1;
559 api_info.device_handle = NULL;
560 api_info.context_handle = NULL;
561 api_info.async_handle = NULL;
562 }
563
564 if (profiling_p)
565 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
566
567 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
568 thr->mapped_data = tgt->prev;
569 gomp_unmap_vars (tgt, true);
570 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
571
572 if (profiling_p)
573 {
574 prof_info.event_type = acc_ev_exit_data_end;
575 exit_data_event_info.other_event.event_type = prof_info.event_type;
576 goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
577
578 thr->prof_info = NULL;
579 thr->api_info = NULL;
580 }
581 }
582
583 void
584 GOACC_enter_exit_data (int flags_m, size_t mapnum,
585 void **hostaddrs, size_t *sizes, unsigned short *kinds,
586 int async, int num_waits, ...)
587 {
588 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
589
590 struct goacc_thread *thr;
591 struct gomp_device_descr *acc_dev;
592 bool data_enter = false;
593 size_t i;
594
595 goacc_lazy_initialize ();
596
597 thr = goacc_thread ();
598 acc_dev = thr->dev;
599
600 /* Determine whether "finalize" semantics apply to all mappings of this
601 OpenACC directive. */
602 bool finalize = false;
603 if (mapnum > 0)
604 {
605 unsigned char kind = kinds[0] & 0xff;
606 if (kind == GOMP_MAP_DELETE
607 || kind == GOMP_MAP_FORCE_FROM)
608 finalize = true;
609 }
610
611 /* Determine if this is an "acc enter data". */
612 for (i = 0; i < mapnum; ++i)
613 {
614 unsigned char kind = kinds[i] & 0xff;
615
616 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
617 continue;
618
619 if (kind == GOMP_MAP_FORCE_ALLOC
620 || kind == GOMP_MAP_FORCE_PRESENT
621 || kind == GOMP_MAP_FORCE_TO
622 || kind == GOMP_MAP_TO
623 || kind == GOMP_MAP_ALLOC)
624 {
625 data_enter = true;
626 break;
627 }
628
629 if (kind == GOMP_MAP_RELEASE
630 || kind == GOMP_MAP_DELETE
631 || kind == GOMP_MAP_FROM
632 || kind == GOMP_MAP_FORCE_FROM)
633 break;
634
635 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
636 kind);
637 }
638
639 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
640
641 acc_prof_info prof_info;
642 if (profiling_p)
643 {
644 thr->prof_info = &prof_info;
645
646 prof_info.event_type
647 = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
648 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
649 prof_info.version = _ACC_PROF_INFO_VERSION;
650 prof_info.device_type = acc_device_type (acc_dev->type);
651 prof_info.device_number = acc_dev->target_id;
652 prof_info.thread_id = -1;
653 prof_info.async = async;
654 prof_info.async_queue = prof_info.async;
655 prof_info.src_file = NULL;
656 prof_info.func_name = NULL;
657 prof_info.line_no = -1;
658 prof_info.end_line_no = -1;
659 prof_info.func_line_no = -1;
660 prof_info.func_end_line_no = -1;
661 }
662 acc_event_info enter_exit_data_event_info;
663 if (profiling_p)
664 {
665 enter_exit_data_event_info.other_event.event_type
666 = prof_info.event_type;
667 enter_exit_data_event_info.other_event.valid_bytes
668 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
669 enter_exit_data_event_info.other_event.parent_construct
670 = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
671 enter_exit_data_event_info.other_event.implicit = 0;
672 enter_exit_data_event_info.other_event.tool_info = NULL;
673 }
674 acc_api_info api_info;
675 if (profiling_p)
676 {
677 thr->api_info = &api_info;
678
679 api_info.device_api = acc_device_api_none;
680 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
681 api_info.device_type = prof_info.device_type;
682 api_info.vendor = -1;
683 api_info.device_handle = NULL;
684 api_info.context_handle = NULL;
685 api_info.async_handle = NULL;
686 }
687
688 if (profiling_p)
689 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
690 &api_info);
691
692 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
693 || (flags & GOACC_FLAG_HOST_FALLBACK))
694 {
695 prof_info.device_type = acc_device_host;
696 api_info.device_type = prof_info.device_type;
697
698 goto out_prof;
699 }
700
701 if (num_waits)
702 {
703 va_list ap;
704
705 va_start (ap, num_waits);
706 goacc_wait (async, num_waits, &ap);
707 va_end (ap);
708 }
709
710 /* In c, non-pointers and arrays are represented by a single data clause.
711 Dynamically allocated arrays and subarrays are represented by a data
712 clause followed by an internal GOMP_MAP_POINTER.
713
714 In fortran, scalars and not allocated arrays are represented by a
715 single data clause. Allocated arrays and subarrays have three mappings:
716 1) the original data clause, 2) a PSET 3) a pointer to the array data.
717 */
718
719 if (data_enter)
720 {
721 for (i = 0; i < mapnum; i++)
722 {
723 unsigned char kind = kinds[i] & 0xff;
724
725 /* Scan for pointers and PSETs. */
726 int pointer = find_pointer (i, mapnum, kinds);
727
728 if (!pointer)
729 {
730 switch (kind)
731 {
732 case GOMP_MAP_ALLOC:
733 case GOMP_MAP_FORCE_ALLOC:
734 acc_create_async (hostaddrs[i], sizes[i], async);
735 break;
736 case GOMP_MAP_TO:
737 case GOMP_MAP_FORCE_TO:
738 acc_copyin_async (hostaddrs[i], sizes[i], async);
739 break;
740 default:
741 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
742 kind);
743 break;
744 }
745 }
746 else
747 {
748 gomp_acc_insert_pointer (pointer, &hostaddrs[i],
749 &sizes[i], &kinds[i], async);
750 /* Increment 'i' by two because OpenACC requires fortran
751 arrays to be contiguous, so each PSET is associated with
752 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
753 one MAP_POINTER. */
754 i += pointer - 1;
755 }
756 }
757 }
758 else
759 for (i = 0; i < mapnum; ++i)
760 {
761 unsigned char kind = kinds[i] & 0xff;
762
763 int pointer = find_pointer (i, mapnum, kinds);
764
765 if (!pointer)
766 {
767 switch (kind)
768 {
769 case GOMP_MAP_RELEASE:
770 case GOMP_MAP_DELETE:
771 if (acc_is_present (hostaddrs[i], sizes[i]))
772 {
773 if (finalize)
774 acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
775 else
776 acc_delete_async (hostaddrs[i], sizes[i], async);
777 }
778 break;
779 case GOMP_MAP_FROM:
780 case GOMP_MAP_FORCE_FROM:
781 if (finalize)
782 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
783 else
784 acc_copyout_async (hostaddrs[i], sizes[i], async);
785 break;
786 default:
787 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
788 kind);
789 break;
790 }
791 }
792 else
793 {
794 bool copyfrom = (kind == GOMP_MAP_FORCE_FROM
795 || kind == GOMP_MAP_FROM);
796 gomp_acc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async,
797 finalize, pointer);
798 /* See the above comment. */
799 i += pointer - 1;
800 }
801 }
802
803 out_prof:
804 if (profiling_p)
805 {
806 prof_info.event_type
807 = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
808 enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
809 goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
810 &api_info);
811
812 thr->prof_info = NULL;
813 thr->api_info = NULL;
814 }
815 }
816
817 static void
818 goacc_wait (int async, int num_waits, va_list *ap)
819 {
820 while (num_waits--)
821 {
822 int qid = va_arg (*ap, int);
823
824 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
825 if (qid == acc_async_noval)
826 {
827 if (async == acc_async_sync)
828 acc_wait_all ();
829 else
830 acc_wait_all_async (async);
831 break;
832 }
833
834 if (acc_async_test (qid))
835 continue;
836
837 if (async == acc_async_sync)
838 acc_wait (qid);
839 else if (qid == async)
840 /* If we're waiting on the same asynchronous queue as we're
841 launching on, the queue itself will order work as
842 required, so there's no need to wait explicitly. */
843 ;
844 else
845 acc_wait_async (qid, async);
846 }
847 }
848
849 void
850 GOACC_update (int flags_m, size_t mapnum,
851 void **hostaddrs, size_t *sizes, unsigned short *kinds,
852 int async, int num_waits, ...)
853 {
854 int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
855
856 size_t i;
857
858 goacc_lazy_initialize ();
859
860 struct goacc_thread *thr = goacc_thread ();
861 struct gomp_device_descr *acc_dev = thr->dev;
862
863 bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
864
865 acc_prof_info prof_info;
866 if (profiling_p)
867 {
868 thr->prof_info = &prof_info;
869
870 prof_info.event_type = acc_ev_update_start;
871 prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
872 prof_info.version = _ACC_PROF_INFO_VERSION;
873 prof_info.device_type = acc_device_type (acc_dev->type);
874 prof_info.device_number = acc_dev->target_id;
875 prof_info.thread_id = -1;
876 prof_info.async = async;
877 prof_info.async_queue = prof_info.async;
878 prof_info.src_file = NULL;
879 prof_info.func_name = NULL;
880 prof_info.line_no = -1;
881 prof_info.end_line_no = -1;
882 prof_info.func_line_no = -1;
883 prof_info.func_end_line_no = -1;
884 }
885 acc_event_info update_event_info;
886 if (profiling_p)
887 {
888 update_event_info.other_event.event_type
889 = prof_info.event_type;
890 update_event_info.other_event.valid_bytes
891 = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
892 update_event_info.other_event.parent_construct = acc_construct_update;
893 update_event_info.other_event.implicit = 0;
894 update_event_info.other_event.tool_info = NULL;
895 }
896 acc_api_info api_info;
897 if (profiling_p)
898 {
899 thr->api_info = &api_info;
900
901 api_info.device_api = acc_device_api_none;
902 api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
903 api_info.device_type = prof_info.device_type;
904 api_info.vendor = -1;
905 api_info.device_handle = NULL;
906 api_info.context_handle = NULL;
907 api_info.async_handle = NULL;
908 }
909
910 if (profiling_p)
911 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
912
913 if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
914 || (flags & GOACC_FLAG_HOST_FALLBACK))
915 {
916 prof_info.device_type = acc_device_host;
917 api_info.device_type = prof_info.device_type;
918
919 goto out_prof;
920 }
921
922 if (num_waits)
923 {
924 va_list ap;
925
926 va_start (ap, num_waits);
927 goacc_wait (async, num_waits, &ap);
928 va_end (ap);
929 }
930
931 bool update_device = false;
932 for (i = 0; i < mapnum; ++i)
933 {
934 unsigned char kind = kinds[i] & 0xff;
935
936 switch (kind)
937 {
938 case GOMP_MAP_POINTER:
939 case GOMP_MAP_TO_PSET:
940 break;
941
942 case GOMP_MAP_ALWAYS_POINTER:
943 if (update_device)
944 {
945 /* Save the contents of the host pointer. */
946 void *dptr = acc_deviceptr (hostaddrs[i-1]);
947 uintptr_t t = *(uintptr_t *) hostaddrs[i];
948
949 /* Update the contents of the host pointer to reflect
950 the value of the allocated device memory in the
951 previous pointer. */
952 *(uintptr_t *) hostaddrs[i] = (uintptr_t)dptr;
953 /* TODO: verify that we really cannot use acc_update_device_async
954 here. */
955 acc_update_device (hostaddrs[i], sizeof (uintptr_t));
956
957 /* Restore the host pointer. */
958 *(uintptr_t *) hostaddrs[i] = t;
959 update_device = false;
960 }
961 break;
962
963 case GOMP_MAP_TO:
964 if (!acc_is_present (hostaddrs[i], sizes[i]))
965 {
966 update_device = false;
967 break;
968 }
969 /* Fallthru */
970 case GOMP_MAP_FORCE_TO:
971 update_device = true;
972 acc_update_device_async (hostaddrs[i], sizes[i], async);
973 break;
974
975 case GOMP_MAP_FROM:
976 if (!acc_is_present (hostaddrs[i], sizes[i]))
977 {
978 update_device = false;
979 break;
980 }
981 /* Fallthru */
982 case GOMP_MAP_FORCE_FROM:
983 update_device = false;
984 acc_update_self_async (hostaddrs[i], sizes[i], async);
985 break;
986
987 default:
988 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
989 break;
990 }
991 }
992
993 out_prof:
994 if (profiling_p)
995 {
996 prof_info.event_type = acc_ev_update_end;
997 update_event_info.other_event.event_type = prof_info.event_type;
998 goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
999
1000 thr->prof_info = NULL;
1001 thr->api_info = NULL;
1002 }
1003 }
1004
1005 void
1006 GOACC_wait (int async, int num_waits, ...)
1007 {
1008 goacc_lazy_initialize ();
1009
1010 struct goacc_thread *thr = goacc_thread ();
1011
1012 /* No nesting. */
1013 assert (thr->prof_info == NULL);
1014 assert (thr->api_info == NULL);
1015 acc_prof_info prof_info;
1016 acc_api_info api_info;
1017 bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
1018 if (profiling_p)
1019 {
1020 prof_info.async = async;
1021 prof_info.async_queue = prof_info.async;
1022 }
1023
1024 if (num_waits)
1025 {
1026 va_list ap;
1027
1028 va_start (ap, num_waits);
1029 goacc_wait (async, num_waits, &ap);
1030 va_end (ap);
1031 }
1032 else if (async == acc_async_sync)
1033 acc_wait_all ();
1034 else
1035 acc_wait_all_async (async);
1036
1037 if (profiling_p)
1038 {
1039 thr->prof_info = NULL;
1040 thr->api_info = NULL;
1041 }
1042 }
1043
1044 /* Legacy entry point (GCC 5). */
1045
1046 int
1047 GOACC_get_num_threads (void)
1048 {
1049 return 1;
1050 }
1051
1052 /* Legacy entry point (GCC 5). */
1053
1054 int
1055 GOACC_get_thread_num (void)
1056 {
1057 return 0;
1058 }
1059
1060 void
1061 GOACC_declare (int flags_m, size_t mapnum,
1062 void **hostaddrs, size_t *sizes, unsigned short *kinds)
1063 {
1064 int i;
1065
1066 for (i = 0; i < mapnum; i++)
1067 {
1068 unsigned char kind = kinds[i] & 0xff;
1069
1070 if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
1071 continue;
1072
1073 switch (kind)
1074 {
1075 case GOMP_MAP_FORCE_ALLOC:
1076 case GOMP_MAP_FORCE_FROM:
1077 case GOMP_MAP_FORCE_TO:
1078 case GOMP_MAP_POINTER:
1079 case GOMP_MAP_RELEASE:
1080 case GOMP_MAP_DELETE:
1081 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1082 &kinds[i], GOMP_ASYNC_SYNC, 0);
1083 break;
1084
1085 case GOMP_MAP_FORCE_DEVICEPTR:
1086 break;
1087
1088 case GOMP_MAP_ALLOC:
1089 if (!acc_is_present (hostaddrs[i], sizes[i]))
1090 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1091 &kinds[i], GOMP_ASYNC_SYNC, 0);
1092 break;
1093
1094 case GOMP_MAP_TO:
1095 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1096 &kinds[i], GOMP_ASYNC_SYNC, 0);
1097
1098 break;
1099
1100 case GOMP_MAP_FROM:
1101 GOACC_enter_exit_data (flags_m, 1, &hostaddrs[i], &sizes[i],
1102 &kinds[i], GOMP_ASYNC_SYNC, 0);
1103 break;
1104
1105 case GOMP_MAP_FORCE_PRESENT:
1106 if (!acc_is_present (hostaddrs[i], sizes[i]))
1107 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
1108 (unsigned long) sizes[i]);
1109 break;
1110
1111 default:
1112 assert (0);
1113 break;
1114 }
1115 }
1116 }