1 /* Copyright (C) 2013-2019 Free Software Foundation, Inc.
3 Contributed by Mentor Embedded.
5 This file is part of the GNU Offloading and Multi Processing Library
8 Libgomp is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 Under Section 7 of GPL version 3, you are granted additional
19 permissions described in the GCC Runtime Library Exception, version
20 3.1, as published by the Free Software Foundation.
22 You should have received a copy of the GNU General Public License and
23 a copy of the GCC Runtime Library Exception along with this program;
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 <http://www.gnu.org/licenses/>. */
27 /* This file handles OpenACC constructs. */
31 #include "gomp-constants.h"
33 #ifdef HAVE_INTTYPES_H
34 # include <inttypes.h> /* For PRIu64. */
41 /* In the ABI, the GOACC_FLAGs are encoded as an inverted bitmask, so that we
42 continue to support the following two legacy values. */
43 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_ICV
) == 0,
44 "legacy GOMP_DEVICE_ICV broken");
45 _Static_assert (GOACC_FLAGS_UNMARSHAL (GOMP_DEVICE_HOST_FALLBACK
)
46 == GOACC_FLAG_HOST_FALLBACK
,
47 "legacy GOMP_DEVICE_HOST_FALLBACK broken");
50 /* Returns the number of mappings associated with the pointer or pset. PSET
51 have three mappings, whereas pointer have two. */
54 find_pointer (int pos
, size_t mapnum
, unsigned short *kinds
)
56 if (pos
+ 1 >= mapnum
)
59 unsigned char kind
= kinds
[pos
+1] & 0xff;
61 if (kind
== GOMP_MAP_TO_PSET
)
63 else if (kind
== GOMP_MAP_POINTER
)
69 /* Handle the mapping pair that are presented when a
70 deviceptr clause is used with Fortran. */
73 handle_ftn_pointers (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
74 unsigned short *kinds
)
78 for (i
= 0; i
< mapnum
; i
++)
80 unsigned short kind1
= kinds
[i
] & 0xff;
82 /* Handle Fortran deviceptr clause. */
83 if (kind1
== GOMP_MAP_FORCE_DEVICEPTR
)
87 if (i
< (signed)mapnum
- 1)
88 kind2
= kinds
[i
+ 1] & 0xff;
92 if (sizes
[i
] == sizeof (void *))
95 /* At this point, we're dealing with a Fortran deviceptr.
96 If the next element is not what we're expecting, then
97 this is an instance of where the deviceptr variable was
98 not used within the region and the pointer was removed
100 if (kind2
== GOMP_MAP_POINTER
102 && hostaddrs
[i
] == *(void **)hostaddrs
[i
+ 1])
104 kinds
[i
+1] = kinds
[i
];
105 sizes
[i
+1] = sizeof (void *);
108 /* Invalidate the entry. */
114 static void goacc_wait (int async
, int num_waits
, va_list *ap
);
117 /* Launch a possibly offloaded function with FLAGS. FN is the host fn
118 address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory
119 blocks to be copied to/from the device. Varadic arguments are
120 keyed optional parameters terminated with a zero. */
123 GOACC_parallel_keyed (int flags_m
, void (*fn
) (void *),
124 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
125 unsigned short *kinds
, ...)
127 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
130 struct goacc_thread
*thr
;
131 struct gomp_device_descr
*acc_dev
;
132 struct target_mem_desc
*tgt
;
135 struct splay_tree_key_s k
;
136 splay_tree_key tgt_fn_key
;
138 int async
= GOMP_ASYNC_SYNC
;
139 unsigned dims
[GOMP_DIM_MAX
];
142 #ifdef HAVE_INTTYPES_H
143 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
144 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
146 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
147 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
149 goacc_lazy_initialize ();
151 thr
= goacc_thread ();
154 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
156 acc_prof_info prof_info
;
159 thr
->prof_info
= &prof_info
;
161 prof_info
.event_type
= acc_ev_compute_construct_start
;
162 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
163 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
164 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
165 prof_info
.device_number
= acc_dev
->target_id
;
166 prof_info
.thread_id
= -1;
167 prof_info
.async
= async
;
168 prof_info
.async_queue
= prof_info
.async
;
169 prof_info
.src_file
= NULL
;
170 prof_info
.func_name
= NULL
;
171 prof_info
.line_no
= -1;
172 prof_info
.end_line_no
= -1;
173 prof_info
.func_line_no
= -1;
174 prof_info
.func_end_line_no
= -1;
176 acc_event_info compute_construct_event_info
;
179 compute_construct_event_info
.other_event
.event_type
180 = prof_info
.event_type
;
181 compute_construct_event_info
.other_event
.valid_bytes
182 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
183 compute_construct_event_info
.other_event
.parent_construct
184 = acc_construct_parallel
;
185 compute_construct_event_info
.other_event
.implicit
= 0;
186 compute_construct_event_info
.other_event
.tool_info
= NULL
;
188 acc_api_info api_info
;
191 thr
->api_info
= &api_info
;
193 api_info
.device_api
= acc_device_api_none
;
194 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
195 api_info
.device_type
= prof_info
.device_type
;
196 api_info
.vendor
= -1;
197 api_info
.device_handle
= NULL
;
198 api_info
.context_handle
= NULL
;
199 api_info
.async_handle
= NULL
;
203 goacc_profiling_dispatch (&prof_info
, &compute_construct_event_info
,
206 handle_ftn_pointers (mapnum
, hostaddrs
, sizes
, kinds
);
208 /* Host fallback if "if" clause is false or if the current device is set to
210 if (flags
& GOACC_FLAG_HOST_FALLBACK
)
212 prof_info
.device_type
= acc_device_host
;
213 api_info
.device_type
= prof_info
.device_type
;
214 goacc_save_and_set_bind (acc_device_host
);
216 goacc_restore_bind ();
219 else if (acc_device_type (acc_dev
->type
) == acc_device_host
)
225 /* Default: let the runtime choose. */
226 for (i
= 0; i
!= GOMP_DIM_MAX
; i
++)
229 va_start (ap
, kinds
);
230 /* TODO: This will need amending when device_type is implemented. */
231 while ((tag
= va_arg (ap
, unsigned)) != 0)
233 if (GOMP_LAUNCH_DEVICE (tag
))
234 gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
235 GOMP_LAUNCH_DEVICE (tag
));
237 switch (GOMP_LAUNCH_CODE (tag
))
239 case GOMP_LAUNCH_DIM
:
241 unsigned mask
= GOMP_LAUNCH_OP (tag
);
243 for (i
= 0; i
!= GOMP_DIM_MAX
; i
++)
244 if (mask
& GOMP_DIM_MASK (i
))
245 dims
[i
] = va_arg (ap
, unsigned);
249 case GOMP_LAUNCH_ASYNC
:
251 /* Small constant values are encoded in the operand. */
252 async
= GOMP_LAUNCH_OP (tag
);
254 if (async
== GOMP_LAUNCH_OP_MAX
)
255 async
= va_arg (ap
, unsigned);
259 prof_info
.async
= async
;
260 prof_info
.async_queue
= prof_info
.async
;
266 case GOMP_LAUNCH_WAIT
:
268 unsigned num_waits
= GOMP_LAUNCH_OP (tag
);
269 goacc_wait (async
, num_waits
, &ap
);
274 gomp_fatal ("unrecognized offload code '%d',"
275 " libgomp is too old", GOMP_LAUNCH_CODE (tag
));
280 if (!(acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_NATIVE_EXEC
))
282 k
.host_start
= (uintptr_t) fn
;
283 k
.host_end
= k
.host_start
+ 1;
284 gomp_mutex_lock (&acc_dev
->lock
);
285 tgt_fn_key
= splay_tree_lookup (&acc_dev
->mem_map
, &k
);
286 gomp_mutex_unlock (&acc_dev
->lock
);
288 if (tgt_fn_key
== NULL
)
289 gomp_fatal ("target function wasn't mapped");
291 tgt_fn
= (void (*)) tgt_fn_key
->tgt_offset
;
294 tgt_fn
= (void (*)) fn
;
296 acc_event_info enter_exit_data_event_info
;
299 prof_info
.event_type
= acc_ev_enter_data_start
;
300 enter_exit_data_event_info
.other_event
.event_type
301 = prof_info
.event_type
;
302 enter_exit_data_event_info
.other_event
.valid_bytes
303 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
304 enter_exit_data_event_info
.other_event
.parent_construct
305 = compute_construct_event_info
.other_event
.parent_construct
;
306 enter_exit_data_event_info
.other_event
.implicit
= 1;
307 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
308 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
312 goacc_aq aq
= get_goacc_asyncqueue (async
);
314 tgt
= gomp_map_vars_async (acc_dev
, aq
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
,
315 true, GOMP_MAP_VARS_OPENACC
);
318 prof_info
.event_type
= acc_ev_enter_data_end
;
319 enter_exit_data_event_info
.other_event
.event_type
320 = prof_info
.event_type
;
321 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
325 devaddrs
= gomp_alloca (sizeof (void *) * mapnum
);
326 for (i
= 0; i
< mapnum
; i
++)
327 if (tgt
->list
[i
].key
!= NULL
)
328 devaddrs
[i
] = (void *) (tgt
->list
[i
].key
->tgt
->tgt_start
329 + tgt
->list
[i
].key
->tgt_offset
330 + tgt
->list
[i
].offset
);
334 acc_dev
->openacc
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
, dims
,
337 acc_dev
->openacc
.async
.exec_func (tgt_fn
, mapnum
, hostaddrs
, devaddrs
,
342 prof_info
.event_type
= acc_ev_exit_data_start
;
343 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
344 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
345 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
349 /* If running synchronously, unmap immediately. */
351 gomp_unmap_vars (tgt
, true);
353 gomp_unmap_vars_async (tgt
, true, aq
);
357 prof_info
.event_type
= acc_ev_exit_data_end
;
358 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
359 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
366 prof_info
.event_type
= acc_ev_compute_construct_end
;
367 compute_construct_event_info
.other_event
.event_type
368 = prof_info
.event_type
;
369 goacc_profiling_dispatch (&prof_info
, &compute_construct_event_info
,
372 thr
->prof_info
= NULL
;
373 thr
->api_info
= NULL
;
377 /* Legacy entry point (GCC 5). Only provide host fallback execution. */
380 GOACC_parallel (int flags_m
, void (*fn
) (void *),
381 size_t mapnum
, void **hostaddrs
, size_t *sizes
,
382 unsigned short *kinds
,
383 int num_gangs
, int num_workers
, int vector_length
,
384 int async
, int num_waits
, ...)
386 goacc_save_and_set_bind (acc_device_host
);
388 goacc_restore_bind ();
392 GOACC_data_start (int flags_m
, size_t mapnum
,
393 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
395 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
397 struct target_mem_desc
*tgt
;
399 #ifdef HAVE_INTTYPES_H
400 gomp_debug (0, "%s: mapnum=%"PRIu64
", hostaddrs=%p, size=%p, kinds=%p\n",
401 __FUNCTION__
, (uint64_t) mapnum
, hostaddrs
, sizes
, kinds
);
403 gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
404 __FUNCTION__
, (unsigned long) mapnum
, hostaddrs
, sizes
, kinds
);
407 goacc_lazy_initialize ();
409 struct goacc_thread
*thr
= goacc_thread ();
410 struct gomp_device_descr
*acc_dev
= thr
->dev
;
412 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
414 acc_prof_info prof_info
;
417 thr
->prof_info
= &prof_info
;
419 prof_info
.event_type
= acc_ev_enter_data_start
;
420 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
421 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
422 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
423 prof_info
.device_number
= acc_dev
->target_id
;
424 prof_info
.thread_id
= -1;
425 prof_info
.async
= acc_async_sync
; /* Always synchronous. */
426 prof_info
.async_queue
= prof_info
.async
;
427 prof_info
.src_file
= NULL
;
428 prof_info
.func_name
= NULL
;
429 prof_info
.line_no
= -1;
430 prof_info
.end_line_no
= -1;
431 prof_info
.func_line_no
= -1;
432 prof_info
.func_end_line_no
= -1;
434 acc_event_info enter_data_event_info
;
437 enter_data_event_info
.other_event
.event_type
438 = prof_info
.event_type
;
439 enter_data_event_info
.other_event
.valid_bytes
440 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
441 enter_data_event_info
.other_event
.parent_construct
= acc_construct_data
;
442 for (int i
= 0; i
< mapnum
; ++i
)
443 if ((kinds
[i
] & 0xff) == GOMP_MAP_USE_DEVICE_PTR
)
445 /* If there is one such data mapping kind, then this is actually an
446 OpenACC 'host_data' construct. (GCC maps the OpenACC
447 'host_data' construct to the OpenACC 'data' construct.) Apart
448 from artificial test cases (such as an OpenACC 'host_data'
449 construct's (implicit) device initialization when there hasn't
450 been any device data be set up before...), there can't really
451 any meaningful events be generated from OpenACC 'host_data'
452 constructs, though. */
453 enter_data_event_info
.other_event
.parent_construct
454 = acc_construct_host_data
;
457 enter_data_event_info
.other_event
.implicit
= 0;
458 enter_data_event_info
.other_event
.tool_info
= NULL
;
460 acc_api_info api_info
;
463 thr
->api_info
= &api_info
;
465 api_info
.device_api
= acc_device_api_none
;
466 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
467 api_info
.device_type
= prof_info
.device_type
;
468 api_info
.vendor
= -1;
469 api_info
.device_handle
= NULL
;
470 api_info
.context_handle
= NULL
;
471 api_info
.async_handle
= NULL
;
475 goacc_profiling_dispatch (&prof_info
, &enter_data_event_info
, &api_info
);
477 /* Host fallback or 'do nothing'. */
478 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
479 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
481 prof_info
.device_type
= acc_device_host
;
482 api_info
.device_type
= prof_info
.device_type
;
483 tgt
= gomp_map_vars (NULL
, 0, NULL
, NULL
, NULL
, NULL
, true,
484 GOMP_MAP_VARS_OPENACC
);
485 tgt
->prev
= thr
->mapped_data
;
486 thr
->mapped_data
= tgt
;
491 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
492 tgt
= gomp_map_vars (acc_dev
, mapnum
, hostaddrs
, NULL
, sizes
, kinds
, true,
493 GOMP_MAP_VARS_OPENACC
);
494 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
495 tgt
->prev
= thr
->mapped_data
;
496 thr
->mapped_data
= tgt
;
501 prof_info
.event_type
= acc_ev_enter_data_end
;
502 enter_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
503 goacc_profiling_dispatch (&prof_info
, &enter_data_event_info
, &api_info
);
505 thr
->prof_info
= NULL
;
506 thr
->api_info
= NULL
;
511 GOACC_data_end (void)
513 struct goacc_thread
*thr
= goacc_thread ();
514 struct gomp_device_descr
*acc_dev
= thr
->dev
;
515 struct target_mem_desc
*tgt
= thr
->mapped_data
;
517 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
519 acc_prof_info prof_info
;
522 thr
->prof_info
= &prof_info
;
524 prof_info
.event_type
= acc_ev_exit_data_start
;
525 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
526 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
527 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
528 prof_info
.device_number
= acc_dev
->target_id
;
529 prof_info
.thread_id
= -1;
530 prof_info
.async
= acc_async_sync
; /* Always synchronous. */
531 prof_info
.async_queue
= prof_info
.async
;
532 prof_info
.src_file
= NULL
;
533 prof_info
.func_name
= NULL
;
534 prof_info
.line_no
= -1;
535 prof_info
.end_line_no
= -1;
536 prof_info
.func_line_no
= -1;
537 prof_info
.func_end_line_no
= -1;
539 acc_event_info exit_data_event_info
;
542 exit_data_event_info
.other_event
.event_type
543 = prof_info
.event_type
;
544 exit_data_event_info
.other_event
.valid_bytes
545 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
546 exit_data_event_info
.other_event
.parent_construct
= acc_construct_data
;
547 exit_data_event_info
.other_event
.implicit
= 0;
548 exit_data_event_info
.other_event
.tool_info
= NULL
;
550 acc_api_info api_info
;
553 thr
->api_info
= &api_info
;
555 api_info
.device_api
= acc_device_api_none
;
556 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
557 api_info
.device_type
= prof_info
.device_type
;
558 api_info
.vendor
= -1;
559 api_info
.device_handle
= NULL
;
560 api_info
.context_handle
= NULL
;
561 api_info
.async_handle
= NULL
;
565 goacc_profiling_dispatch (&prof_info
, &exit_data_event_info
, &api_info
);
567 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
568 thr
->mapped_data
= tgt
->prev
;
569 gomp_unmap_vars (tgt
, true);
570 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
574 prof_info
.event_type
= acc_ev_exit_data_end
;
575 exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
576 goacc_profiling_dispatch (&prof_info
, &exit_data_event_info
, &api_info
);
578 thr
->prof_info
= NULL
;
579 thr
->api_info
= NULL
;
584 GOACC_enter_exit_data (int flags_m
, size_t mapnum
,
585 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
586 int async
, int num_waits
, ...)
588 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
590 struct goacc_thread
*thr
;
591 struct gomp_device_descr
*acc_dev
;
592 bool data_enter
= false;
595 goacc_lazy_initialize ();
597 thr
= goacc_thread ();
600 /* Determine whether "finalize" semantics apply to all mappings of this
601 OpenACC directive. */
602 bool finalize
= false;
605 unsigned char kind
= kinds
[0] & 0xff;
606 if (kind
== GOMP_MAP_DELETE
607 || kind
== GOMP_MAP_FORCE_FROM
)
611 /* Determine if this is an "acc enter data". */
612 for (i
= 0; i
< mapnum
; ++i
)
614 unsigned char kind
= kinds
[i
] & 0xff;
616 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
619 if (kind
== GOMP_MAP_FORCE_ALLOC
620 || kind
== GOMP_MAP_FORCE_PRESENT
621 || kind
== GOMP_MAP_FORCE_TO
622 || kind
== GOMP_MAP_TO
623 || kind
== GOMP_MAP_ALLOC
)
629 if (kind
== GOMP_MAP_RELEASE
630 || kind
== GOMP_MAP_DELETE
631 || kind
== GOMP_MAP_FROM
632 || kind
== GOMP_MAP_FORCE_FROM
)
635 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
639 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
641 acc_prof_info prof_info
;
644 thr
->prof_info
= &prof_info
;
647 = data_enter
? acc_ev_enter_data_start
: acc_ev_exit_data_start
;
648 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
649 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
650 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
651 prof_info
.device_number
= acc_dev
->target_id
;
652 prof_info
.thread_id
= -1;
653 prof_info
.async
= async
;
654 prof_info
.async_queue
= prof_info
.async
;
655 prof_info
.src_file
= NULL
;
656 prof_info
.func_name
= NULL
;
657 prof_info
.line_no
= -1;
658 prof_info
.end_line_no
= -1;
659 prof_info
.func_line_no
= -1;
660 prof_info
.func_end_line_no
= -1;
662 acc_event_info enter_exit_data_event_info
;
665 enter_exit_data_event_info
.other_event
.event_type
666 = prof_info
.event_type
;
667 enter_exit_data_event_info
.other_event
.valid_bytes
668 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
669 enter_exit_data_event_info
.other_event
.parent_construct
670 = data_enter
? acc_construct_enter_data
: acc_construct_exit_data
;
671 enter_exit_data_event_info
.other_event
.implicit
= 0;
672 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
674 acc_api_info api_info
;
677 thr
->api_info
= &api_info
;
679 api_info
.device_api
= acc_device_api_none
;
680 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
681 api_info
.device_type
= prof_info
.device_type
;
682 api_info
.vendor
= -1;
683 api_info
.device_handle
= NULL
;
684 api_info
.context_handle
= NULL
;
685 api_info
.async_handle
= NULL
;
689 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
692 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
693 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
695 prof_info
.device_type
= acc_device_host
;
696 api_info
.device_type
= prof_info
.device_type
;
705 va_start (ap
, num_waits
);
706 goacc_wait (async
, num_waits
, &ap
);
710 /* In c, non-pointers and arrays are represented by a single data clause.
711 Dynamically allocated arrays and subarrays are represented by a data
712 clause followed by an internal GOMP_MAP_POINTER.
714 In fortran, scalars and not allocated arrays are represented by a
715 single data clause. Allocated arrays and subarrays have three mappings:
716 1) the original data clause, 2) a PSET 3) a pointer to the array data.
721 for (i
= 0; i
< mapnum
; i
++)
723 unsigned char kind
= kinds
[i
] & 0xff;
725 /* Scan for pointers and PSETs. */
726 int pointer
= find_pointer (i
, mapnum
, kinds
);
733 case GOMP_MAP_FORCE_ALLOC
:
734 acc_create_async (hostaddrs
[i
], sizes
[i
], async
);
737 case GOMP_MAP_FORCE_TO
:
738 acc_copyin_async (hostaddrs
[i
], sizes
[i
], async
);
741 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
748 gomp_acc_insert_pointer (pointer
, &hostaddrs
[i
],
749 &sizes
[i
], &kinds
[i
], async
);
750 /* Increment 'i' by two because OpenACC requires fortran
751 arrays to be contiguous, so each PSET is associated with
752 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
759 for (i
= 0; i
< mapnum
; ++i
)
761 unsigned char kind
= kinds
[i
] & 0xff;
763 int pointer
= find_pointer (i
, mapnum
, kinds
);
769 case GOMP_MAP_RELEASE
:
770 case GOMP_MAP_DELETE
:
771 if (acc_is_present (hostaddrs
[i
], sizes
[i
]))
774 acc_delete_finalize_async (hostaddrs
[i
], sizes
[i
], async
);
776 acc_delete_async (hostaddrs
[i
], sizes
[i
], async
);
780 case GOMP_MAP_FORCE_FROM
:
782 acc_copyout_finalize_async (hostaddrs
[i
], sizes
[i
], async
);
784 acc_copyout_async (hostaddrs
[i
], sizes
[i
], async
);
787 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
794 bool copyfrom
= (kind
== GOMP_MAP_FORCE_FROM
795 || kind
== GOMP_MAP_FROM
);
796 gomp_acc_remove_pointer (hostaddrs
[i
], sizes
[i
], copyfrom
, async
,
798 /* See the above comment. */
807 = data_enter
? acc_ev_enter_data_end
: acc_ev_exit_data_end
;
808 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
809 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
812 thr
->prof_info
= NULL
;
813 thr
->api_info
= NULL
;
818 goacc_wait (int async
, int num_waits
, va_list *ap
)
822 int qid
= va_arg (*ap
, int);
824 /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */
825 if (qid
== acc_async_noval
)
827 if (async
== acc_async_sync
)
830 acc_wait_all_async (async
);
834 if (acc_async_test (qid
))
837 if (async
== acc_async_sync
)
839 else if (qid
== async
)
840 /* If we're waiting on the same asynchronous queue as we're
841 launching on, the queue itself will order work as
842 required, so there's no need to wait explicitly. */
845 acc_wait_async (qid
, async
);
850 GOACC_update (int flags_m
, size_t mapnum
,
851 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
,
852 int async
, int num_waits
, ...)
854 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
858 goacc_lazy_initialize ();
860 struct goacc_thread
*thr
= goacc_thread ();
861 struct gomp_device_descr
*acc_dev
= thr
->dev
;
863 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
865 acc_prof_info prof_info
;
868 thr
->prof_info
= &prof_info
;
870 prof_info
.event_type
= acc_ev_update_start
;
871 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
872 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
873 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
874 prof_info
.device_number
= acc_dev
->target_id
;
875 prof_info
.thread_id
= -1;
876 prof_info
.async
= async
;
877 prof_info
.async_queue
= prof_info
.async
;
878 prof_info
.src_file
= NULL
;
879 prof_info
.func_name
= NULL
;
880 prof_info
.line_no
= -1;
881 prof_info
.end_line_no
= -1;
882 prof_info
.func_line_no
= -1;
883 prof_info
.func_end_line_no
= -1;
885 acc_event_info update_event_info
;
888 update_event_info
.other_event
.event_type
889 = prof_info
.event_type
;
890 update_event_info
.other_event
.valid_bytes
891 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
892 update_event_info
.other_event
.parent_construct
= acc_construct_update
;
893 update_event_info
.other_event
.implicit
= 0;
894 update_event_info
.other_event
.tool_info
= NULL
;
896 acc_api_info api_info
;
899 thr
->api_info
= &api_info
;
901 api_info
.device_api
= acc_device_api_none
;
902 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
903 api_info
.device_type
= prof_info
.device_type
;
904 api_info
.vendor
= -1;
905 api_info
.device_handle
= NULL
;
906 api_info
.context_handle
= NULL
;
907 api_info
.async_handle
= NULL
;
911 goacc_profiling_dispatch (&prof_info
, &update_event_info
, &api_info
);
913 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
914 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
916 prof_info
.device_type
= acc_device_host
;
917 api_info
.device_type
= prof_info
.device_type
;
926 va_start (ap
, num_waits
);
927 goacc_wait (async
, num_waits
, &ap
);
931 bool update_device
= false;
932 for (i
= 0; i
< mapnum
; ++i
)
934 unsigned char kind
= kinds
[i
] & 0xff;
938 case GOMP_MAP_POINTER
:
939 case GOMP_MAP_TO_PSET
:
942 case GOMP_MAP_ALWAYS_POINTER
:
945 /* Save the contents of the host pointer. */
946 void *dptr
= acc_deviceptr (hostaddrs
[i
-1]);
947 uintptr_t t
= *(uintptr_t *) hostaddrs
[i
];
949 /* Update the contents of the host pointer to reflect
950 the value of the allocated device memory in the
952 *(uintptr_t *) hostaddrs
[i
] = (uintptr_t)dptr
;
953 /* TODO: verify that we really cannot use acc_update_device_async
955 acc_update_device (hostaddrs
[i
], sizeof (uintptr_t));
957 /* Restore the host pointer. */
958 *(uintptr_t *) hostaddrs
[i
] = t
;
959 update_device
= false;
964 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
966 update_device
= false;
970 case GOMP_MAP_FORCE_TO
:
971 update_device
= true;
972 acc_update_device_async (hostaddrs
[i
], sizes
[i
], async
);
976 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
978 update_device
= false;
982 case GOMP_MAP_FORCE_FROM
:
983 update_device
= false;
984 acc_update_self_async (hostaddrs
[i
], sizes
[i
], async
);
988 gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind
);
996 prof_info
.event_type
= acc_ev_update_end
;
997 update_event_info
.other_event
.event_type
= prof_info
.event_type
;
998 goacc_profiling_dispatch (&prof_info
, &update_event_info
, &api_info
);
1000 thr
->prof_info
= NULL
;
1001 thr
->api_info
= NULL
;
1006 GOACC_wait (int async
, int num_waits
, ...)
1008 goacc_lazy_initialize ();
1010 struct goacc_thread
*thr
= goacc_thread ();
1013 assert (thr
->prof_info
== NULL
);
1014 assert (thr
->api_info
== NULL
);
1015 acc_prof_info prof_info
;
1016 acc_api_info api_info
;
1017 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
1020 prof_info
.async
= async
;
1021 prof_info
.async_queue
= prof_info
.async
;
1028 va_start (ap
, num_waits
);
1029 goacc_wait (async
, num_waits
, &ap
);
1032 else if (async
== acc_async_sync
)
1035 acc_wait_all_async (async
);
1039 thr
->prof_info
= NULL
;
1040 thr
->api_info
= NULL
;
1044 /* Legacy entry point (GCC 5). */
1047 GOACC_get_num_threads (void)
1052 /* Legacy entry point (GCC 5). */
1055 GOACC_get_thread_num (void)
1061 GOACC_declare (int flags_m
, size_t mapnum
,
1062 void **hostaddrs
, size_t *sizes
, unsigned short *kinds
)
1066 for (i
= 0; i
< mapnum
; i
++)
1068 unsigned char kind
= kinds
[i
] & 0xff;
1070 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
1075 case GOMP_MAP_FORCE_ALLOC
:
1076 case GOMP_MAP_FORCE_FROM
:
1077 case GOMP_MAP_FORCE_TO
:
1078 case GOMP_MAP_POINTER
:
1079 case GOMP_MAP_RELEASE
:
1080 case GOMP_MAP_DELETE
:
1081 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1082 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1085 case GOMP_MAP_FORCE_DEVICEPTR
:
1088 case GOMP_MAP_ALLOC
:
1089 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
1090 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1091 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1095 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1096 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1101 GOACC_enter_exit_data (flags_m
, 1, &hostaddrs
[i
], &sizes
[i
],
1102 &kinds
[i
], GOMP_ASYNC_SYNC
, 0);
1105 case GOMP_MAP_FORCE_PRESENT
:
1106 if (!acc_is_present (hostaddrs
[i
], sizes
[i
]))
1107 gomp_fatal ("[%p,%ld] is not mapped", hostaddrs
[i
],
1108 (unsigned long) sizes
[i
]);