1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2019 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
31 #include "gomp-constants.h"
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
40 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
42 struct splay_tree_key_s node
;
45 node
.host_start
= (uintptr_t) h
;
46 node
.host_end
= (uintptr_t) h
+ s
;
48 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
53 /* Helper for lookup_dev. Iterate over splay tree. */
56 lookup_dev_1 (splay_tree_node node
, uintptr_t d
, size_t s
)
58 splay_tree_key key
= &node
->key
;
59 if (d
>= key
->tgt
->tgt_start
&& d
+ s
<= key
->tgt
->tgt_end
)
64 key
= lookup_dev_1 (node
->left
, d
, s
);
65 if (!key
&& node
->right
)
66 key
= lookup_dev_1 (node
->right
, d
, s
);
71 /* Return block containing [D->S), or NULL if not contained.
73 This iterates over the splay tree. This is not expected to be a common
76 The device lock associated with MEM_MAP must be locked on entry, and remains
80 lookup_dev (splay_tree mem_map
, void *d
, size_t s
)
82 if (!mem_map
|| !mem_map
->root
)
85 return lookup_dev_1 (mem_map
->root
, (uintptr_t) d
, s
);
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
98 goacc_lazy_initialize ();
100 struct goacc_thread
*thr
= goacc_thread ();
104 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
107 acc_prof_info prof_info
;
108 acc_api_info api_info
;
109 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
111 void *res
= thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
115 thr
->prof_info
= NULL
;
116 thr
->api_info
= NULL
;
130 struct goacc_thread
*thr
= goacc_thread ();
132 assert (thr
&& thr
->dev
);
134 struct gomp_device_descr
*acc_dev
= thr
->dev
;
136 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
139 acc_prof_info prof_info
;
140 acc_api_info api_info
;
141 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
143 gomp_mutex_lock (&acc_dev
->lock
);
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k
= lookup_dev (&acc_dev
->mem_map
, d
, 1)))
150 void *offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
151 void *h
= k
->host_start
+ offset
;
152 size_t h_size
= k
->host_end
- k
->host_start
;
153 gomp_mutex_unlock (&acc_dev
->lock
);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
161 gomp_mutex_unlock (&acc_dev
->lock
);
163 if (!acc_dev
->free_func (acc_dev
->target_id
, d
))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__
);
168 thr
->prof_info
= NULL
;
169 thr
->api_info
= NULL
;
174 memcpy_tofrom_device (bool from
, void *d
, void *h
, size_t s
, int async
,
175 const char *libfnname
)
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread
*thr
= goacc_thread ();
181 assert (thr
&& thr
->dev
);
183 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
192 acc_prof_info prof_info
;
193 acc_api_info api_info
;
194 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
197 prof_info
.async
= async
;
198 prof_info
.async_queue
= prof_info
.async
;
201 goacc_aq aq
= get_goacc_asyncqueue (async
);
203 gomp_copy_dev2host (thr
->dev
, aq
, h
, d
, s
);
205 gomp_copy_host2dev (thr
->dev
, aq
, d
, h
, s
, /* TODO: cbuf? */ NULL
);
209 thr
->prof_info
= NULL
;
210 thr
->api_info
= NULL
;
215 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
217 memcpy_tofrom_device (false, d
, h
, s
, acc_async_sync
, __FUNCTION__
);
221 acc_memcpy_to_device_async (void *d
, void *h
, size_t s
, int async
)
223 memcpy_tofrom_device (false, d
, h
, s
, async
, __FUNCTION__
);
227 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
229 memcpy_tofrom_device (true, d
, h
, s
, acc_async_sync
, __FUNCTION__
);
233 acc_memcpy_from_device_async (void *h
, void *d
, size_t s
, int async
)
235 memcpy_tofrom_device (true, d
, h
, s
, async
, __FUNCTION__
);
238 /* Return the device pointer that corresponds to host data H. Or NULL
242 acc_deviceptr (void *h
)
248 goacc_lazy_initialize ();
250 struct goacc_thread
*thr
= goacc_thread ();
251 struct gomp_device_descr
*dev
= thr
->dev
;
253 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
256 /* In the following, no OpenACC Profiling Interface events can possibly be
259 gomp_mutex_lock (&dev
->lock
);
261 n
= lookup_host (dev
, h
, 1);
265 gomp_mutex_unlock (&dev
->lock
);
269 offset
= h
- n
->host_start
;
271 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
273 gomp_mutex_unlock (&dev
->lock
);
278 /* Return the host pointer that corresponds to device data D. Or NULL
282 acc_hostptr (void *d
)
288 goacc_lazy_initialize ();
290 struct goacc_thread
*thr
= goacc_thread ();
291 struct gomp_device_descr
*acc_dev
= thr
->dev
;
293 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
296 /* In the following, no OpenACC Profiling Interface events can possibly be
299 gomp_mutex_lock (&acc_dev
->lock
);
301 n
= lookup_dev (&acc_dev
->mem_map
, d
, 1);
305 gomp_mutex_unlock (&acc_dev
->lock
);
309 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
311 h
= n
->host_start
+ offset
;
313 gomp_mutex_unlock (&acc_dev
->lock
);
318 /* Return 1 if host data [H,+S] is present on the device. */
321 acc_is_present (void *h
, size_t s
)
328 goacc_lazy_initialize ();
330 struct goacc_thread
*thr
= goacc_thread ();
331 struct gomp_device_descr
*acc_dev
= thr
->dev
;
333 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
336 /* In the following, no OpenACC Profiling Interface events can possibly be
339 gomp_mutex_lock (&acc_dev
->lock
);
341 n
= lookup_host (acc_dev
, h
, s
);
343 if (n
&& ((uintptr_t)h
< n
->host_start
344 || (uintptr_t)h
+ s
> n
->host_end
345 || s
> n
->host_end
- n
->host_start
))
348 gomp_mutex_unlock (&acc_dev
->lock
);
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
356 acc_map_data (void *h
, void *d
, size_t s
)
358 struct target_mem_desc
*tgt
= NULL
;
363 unsigned short kinds
= GOMP_MAP_ALLOC
;
365 goacc_lazy_initialize ();
367 struct goacc_thread
*thr
= goacc_thread ();
368 struct gomp_device_descr
*acc_dev
= thr
->dev
;
370 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
373 gomp_fatal ("cannot map data on shared-memory system");
377 struct goacc_thread
*thr
= goacc_thread ();
380 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
381 (void *)h
, (int)s
, (void *)d
, (int)s
);
383 acc_prof_info prof_info
;
384 acc_api_info api_info
;
385 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
387 gomp_mutex_lock (&acc_dev
->lock
);
389 if (lookup_host (acc_dev
, h
, s
))
391 gomp_mutex_unlock (&acc_dev
->lock
);
392 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
396 if (lookup_dev (&thr
->dev
->mem_map
, d
, s
))
398 gomp_mutex_unlock (&acc_dev
->lock
);
399 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
403 gomp_mutex_unlock (&acc_dev
->lock
);
405 tgt
= gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
406 &kinds
, true, GOMP_MAP_VARS_OPENACC
);
407 splay_tree_key n
= tgt
->list
[0].key
;
408 assert (n
->refcount
== 1);
409 assert (n
->dynamic_refcount
== 0);
410 /* Special reference counting behavior. */
411 n
->refcount
= REFCOUNT_INFINITY
;
415 thr
->prof_info
= NULL
;
416 thr
->api_info
= NULL
;
422 acc_unmap_data (void *h
)
424 struct goacc_thread
*thr
= goacc_thread ();
425 struct gomp_device_descr
*acc_dev
= thr
->dev
;
427 /* No need to call lazy open, as the address must have been mapped. */
429 /* This is a no-op on shared-memory targets. */
430 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
433 acc_prof_info prof_info
;
434 acc_api_info api_info
;
435 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
439 gomp_mutex_lock (&acc_dev
->lock
);
441 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
442 struct target_mem_desc
*t
;
446 gomp_mutex_unlock (&acc_dev
->lock
);
447 gomp_fatal ("%p is not a mapped block", (void *)h
);
450 host_size
= n
->host_end
- n
->host_start
;
452 if (n
->host_start
!= (uintptr_t) h
)
454 gomp_mutex_unlock (&acc_dev
->lock
);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n
->host_start
, (int) host_size
, (void *) h
);
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
463 else if (n
->refcount
!= REFCOUNT_INFINITY
)
465 gomp_mutex_unlock (&acc_dev
->lock
);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h
, (int) host_size
);
471 /* Mark for removal. */
476 if (t
->refcount
== 2)
478 /* This is the last reference, so pull the descriptor off the
479 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
480 freeing the device memory. */
485 gomp_mutex_unlock (&acc_dev
->lock
);
487 gomp_unmap_vars (t
, true);
491 thr
->prof_info
= NULL
;
492 thr
->api_info
= NULL
;
496 #define FLAG_PRESENT (1 << 0)
497 #define FLAG_CREATE (1 << 1)
498 #define FLAG_COPY (1 << 2)
501 present_create_copy (unsigned f
, void *h
, size_t s
, int async
)
507 gomp_fatal ("[%p,+%d] is a bad range", (void *)h
, (int)s
);
509 goacc_lazy_initialize ();
511 struct goacc_thread
*thr
= goacc_thread ();
512 struct gomp_device_descr
*acc_dev
= thr
->dev
;
514 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
517 acc_prof_info prof_info
;
518 acc_api_info api_info
;
519 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
522 prof_info
.async
= async
;
523 prof_info
.async_queue
= prof_info
.async
;
526 gomp_mutex_lock (&acc_dev
->lock
);
528 n
= lookup_host (acc_dev
, h
, s
);
532 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ h
- n
->host_start
);
534 if (!(f
& FLAG_PRESENT
))
536 gomp_mutex_unlock (&acc_dev
->lock
);
537 gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
538 (void *)h
, (int)s
, (void *)d
, (int)s
);
540 if ((h
+ s
) > (void *)n
->host_end
)
542 gomp_mutex_unlock (&acc_dev
->lock
);
543 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
546 if (n
->refcount
!= REFCOUNT_INFINITY
)
549 n
->dynamic_refcount
++;
551 gomp_mutex_unlock (&acc_dev
->lock
);
553 else if (!(f
& FLAG_CREATE
))
555 gomp_mutex_unlock (&acc_dev
->lock
);
556 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
560 struct target_mem_desc
*tgt
;
562 unsigned short kinds
;
568 kinds
= GOMP_MAP_ALLOC
;
570 gomp_mutex_unlock (&acc_dev
->lock
);
572 goacc_aq aq
= get_goacc_asyncqueue (async
);
574 tgt
= gomp_map_vars_async (acc_dev
, aq
, mapnum
, &hostaddrs
, NULL
, &s
,
575 &kinds
, true, GOMP_MAP_VARS_OPENACC
);
576 /* Initialize dynamic refcount. */
577 tgt
->list
[0].key
->dynamic_refcount
= 1;
584 thr
->prof_info
= NULL
;
585 thr
->api_info
= NULL
;
592 acc_create (void *h
, size_t s
)
594 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
, acc_async_sync
);
598 acc_create_async (void *h
, size_t s
, int async
)
600 present_create_copy (FLAG_PRESENT
| FLAG_CREATE
, h
, s
, async
);
603 /* acc_present_or_create used to be what acc_create is now. */
604 /* acc_pcreate is acc_present_or_create by a different name. */
605 #ifdef HAVE_ATTRIBUTE_ALIAS
606 strong_alias (acc_create
, acc_present_or_create
)
607 strong_alias (acc_create
, acc_pcreate
)
610 acc_present_or_create (void *h
, size_t s
)
612 return acc_create (h
, s
);
616 acc_pcreate (void *h
, size_t s
)
618 return acc_create (h
, s
);
623 acc_copyin (void *h
, size_t s
)
625 return present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
,
630 acc_copyin_async (void *h
, size_t s
, int async
)
632 present_create_copy (FLAG_PRESENT
| FLAG_CREATE
| FLAG_COPY
, h
, s
, async
);
635 /* acc_present_or_copyin used to be what acc_copyin is now. */
636 /* acc_pcopyin is acc_present_or_copyin by a different name. */
637 #ifdef HAVE_ATTRIBUTE_ALIAS
638 strong_alias (acc_copyin
, acc_present_or_copyin
)
639 strong_alias (acc_copyin
, acc_pcopyin
)
642 acc_present_or_copyin (void *h
, size_t s
)
644 return acc_copyin (h
, s
);
648 acc_pcopyin (void *h
, size_t s
)
650 return acc_copyin (h
, s
);
654 #define FLAG_COPYOUT (1 << 0)
655 #define FLAG_FINALIZE (1 << 1)
658 delete_copyout (unsigned f
, void *h
, size_t s
, int async
, const char *libfnname
)
662 struct goacc_thread
*thr
= goacc_thread ();
663 struct gomp_device_descr
*acc_dev
= thr
->dev
;
665 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
668 acc_prof_info prof_info
;
669 acc_api_info api_info
;
670 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
673 prof_info
.async
= async
;
674 prof_info
.async_queue
= prof_info
.async
;
677 gomp_mutex_lock (&acc_dev
->lock
);
679 n
= lookup_host (acc_dev
, h
, s
);
681 /* No need to call lazy open, as the data must already have been
686 gomp_mutex_unlock (&acc_dev
->lock
);
687 gomp_fatal ("[%p,%d] is not mapped", (void *)h
, (int)s
);
690 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
691 + (uintptr_t) h
- n
->host_start
);
693 if ((uintptr_t) h
< n
->host_start
|| (uintptr_t) h
+ s
> n
->host_end
)
695 size_t host_size
= n
->host_end
- n
->host_start
;
696 gomp_mutex_unlock (&acc_dev
->lock
);
697 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
698 (void *) h
, (int) s
, (void *) n
->host_start
, (int) host_size
);
701 if (n
->refcount
== REFCOUNT_INFINITY
)
704 n
->dynamic_refcount
= 0;
706 if (n
->refcount
< n
->dynamic_refcount
)
708 gomp_mutex_unlock (&acc_dev
->lock
);
709 gomp_fatal ("Dynamic reference counting assert fail\n");
712 if (f
& FLAG_FINALIZE
)
714 n
->refcount
-= n
->dynamic_refcount
;
715 n
->dynamic_refcount
= 0;
717 else if (n
->dynamic_refcount
)
719 n
->dynamic_refcount
--;
723 if (n
->refcount
== 0)
725 if (f
& FLAG_COPYOUT
)
727 goacc_aq aq
= get_goacc_asyncqueue (async
);
728 gomp_copy_dev2host (acc_dev
, aq
, h
, d
, s
);
730 gomp_remove_var (acc_dev
, n
);
733 gomp_mutex_unlock (&acc_dev
->lock
);
737 thr
->prof_info
= NULL
;
738 thr
->api_info
= NULL
;
743 acc_delete (void *h
, size_t s
)
745 delete_copyout (0, h
, s
, acc_async_sync
, __FUNCTION__
);
749 acc_delete_async (void *h
, size_t s
, int async
)
751 delete_copyout (0, h
, s
, async
, __FUNCTION__
);
755 acc_delete_finalize (void *h
, size_t s
)
757 delete_copyout (FLAG_FINALIZE
, h
, s
, acc_async_sync
, __FUNCTION__
);
761 acc_delete_finalize_async (void *h
, size_t s
, int async
)
763 delete_copyout (FLAG_FINALIZE
, h
, s
, async
, __FUNCTION__
);
767 acc_copyout (void *h
, size_t s
)
769 delete_copyout (FLAG_COPYOUT
, h
, s
, acc_async_sync
, __FUNCTION__
);
773 acc_copyout_async (void *h
, size_t s
, int async
)
775 delete_copyout (FLAG_COPYOUT
, h
, s
, async
, __FUNCTION__
);
779 acc_copyout_finalize (void *h
, size_t s
)
781 delete_copyout (FLAG_COPYOUT
| FLAG_FINALIZE
, h
, s
, acc_async_sync
,
786 acc_copyout_finalize_async (void *h
, size_t s
, int async
)
788 delete_copyout (FLAG_COPYOUT
| FLAG_FINALIZE
, h
, s
, async
, __FUNCTION__
);
792 update_dev_host (int is_dev
, void *h
, size_t s
, int async
)
797 goacc_lazy_initialize ();
799 struct goacc_thread
*thr
= goacc_thread ();
800 struct gomp_device_descr
*acc_dev
= thr
->dev
;
802 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
805 /* Fortran optional arguments that are non-present result in a
806 NULL host address here. This can safely be ignored as it is
807 not possible to 'update' a non-present optional argument. */
811 acc_prof_info prof_info
;
812 acc_api_info api_info
;
813 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
816 prof_info
.async
= async
;
817 prof_info
.async_queue
= prof_info
.async
;
820 gomp_mutex_lock (&acc_dev
->lock
);
822 n
= lookup_host (acc_dev
, h
, s
);
826 gomp_mutex_unlock (&acc_dev
->lock
);
827 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
830 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
831 + (uintptr_t) h
- n
->host_start
);
833 goacc_aq aq
= get_goacc_asyncqueue (async
);
836 gomp_copy_host2dev (acc_dev
, aq
, d
, h
, s
, /* TODO: cbuf? */ NULL
);
838 gomp_copy_dev2host (acc_dev
, aq
, h
, d
, s
);
840 gomp_mutex_unlock (&acc_dev
->lock
);
844 thr
->prof_info
= NULL
;
845 thr
->api_info
= NULL
;
850 acc_update_device (void *h
, size_t s
)
852 update_dev_host (1, h
, s
, acc_async_sync
);
856 acc_update_device_async (void *h
, size_t s
, int async
)
858 update_dev_host (1, h
, s
, async
);
862 acc_update_self (void *h
, size_t s
)
864 update_dev_host (0, h
, s
, acc_async_sync
);
868 acc_update_self_async (void *h
, size_t s
, int async
)
870 update_dev_host (0, h
, s
, async
);
874 /* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper
877 /* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'.
879 Only the first mapping is considered in reference counting; the following
880 ones implicitly follow suit. */
883 goacc_insert_pointer (size_t mapnum
, void **hostaddrs
, size_t *sizes
,
884 void *kinds
, int async
)
886 struct target_mem_desc
*tgt
;
887 struct goacc_thread
*thr
= goacc_thread ();
888 struct gomp_device_descr
*acc_dev
= thr
->dev
;
890 if (*hostaddrs
== NULL
)
893 if (acc_is_present (*hostaddrs
, *sizes
))
896 gomp_mutex_lock (&acc_dev
->lock
);
897 n
= lookup_host (acc_dev
, *hostaddrs
, *sizes
);
898 gomp_mutex_unlock (&acc_dev
->lock
);
901 for (size_t i
= 0; i
< tgt
->list_count
; i
++)
902 if (tgt
->list
[i
].key
== n
)
904 for (size_t j
= 0; j
< mapnum
; j
++)
905 if (i
+ j
< tgt
->list_count
&& tgt
->list
[i
+ j
].key
)
907 tgt
->list
[i
+ j
].key
->refcount
++;
908 tgt
->list
[i
+ j
].key
->dynamic_refcount
++;
912 /* Should not reach here. */
913 gomp_fatal ("Dynamic refcount incrementing failed for pointer/pset");
916 gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__
);
917 goacc_aq aq
= get_goacc_asyncqueue (async
);
918 tgt
= gomp_map_vars_async (acc_dev
, aq
, mapnum
, hostaddrs
,
919 NULL
, sizes
, kinds
, true, GOMP_MAP_VARS_OPENACC
);
920 gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__
);
922 /* Initialize dynamic refcount. */
923 tgt
->list
[0].key
->dynamic_refcount
= 1;
927 goacc_remove_pointer (void *h
, size_t s
, bool force_copyfrom
, int async
,
928 int finalize
, int mapnum
)
930 struct goacc_thread
*thr
= goacc_thread ();
931 struct gomp_device_descr
*acc_dev
= thr
->dev
;
933 struct target_mem_desc
*t
;
934 int minrefs
= (mapnum
== 1) ? 2 : 3;
936 if (!acc_is_present (h
, s
))
939 gomp_mutex_lock (&acc_dev
->lock
);
941 n
= lookup_host (acc_dev
, h
, 1);
945 gomp_mutex_unlock (&acc_dev
->lock
);
946 gomp_fatal ("%p is not a mapped block", (void *)h
);
949 gomp_debug (0, " %s: restore mappings\n", __FUNCTION__
);
953 if (n
->refcount
< n
->dynamic_refcount
)
955 gomp_mutex_unlock (&acc_dev
->lock
);
956 gomp_fatal ("Dynamic reference counting assert fail\n");
961 n
->refcount
-= n
->dynamic_refcount
;
962 n
->dynamic_refcount
= 0;
964 else if (n
->dynamic_refcount
)
966 n
->dynamic_refcount
--;
970 gomp_mutex_unlock (&acc_dev
->lock
);
972 if (n
->refcount
== 0)
974 /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */
976 t
->refcount
= minrefs
;
977 for (size_t i
= 0; i
< t
->list_count
; i
++)
978 if (t
->list
[i
].key
== n
)
980 t
->list
[i
].copy_from
= force_copyfrom
? 1 : 0;
984 /* If running synchronously, unmap immediately. */
985 if (async
< acc_async_noval
)
986 gomp_unmap_vars (t
, true);
989 goacc_aq aq
= get_goacc_asyncqueue (async
);
990 gomp_unmap_vars_async (t
, true, aq
);
994 gomp_mutex_unlock (&acc_dev
->lock
);
996 gomp_debug (0, " %s: mappings restored\n", __FUNCTION__
);
999 /* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or
1000 'GOMP_MAP_POINTER'. */
1003 find_pointer (int pos
, size_t mapnum
, unsigned short *kinds
)
1005 if (pos
+ 1 >= mapnum
)
1008 unsigned char kind
= kinds
[pos
+1] & 0xff;
1010 if (kind
== GOMP_MAP_TO_PSET
)
1012 else if (kind
== GOMP_MAP_POINTER
)
1019 GOACC_enter_exit_data (int flags_m
, size_t mapnum
, void **hostaddrs
,
1020 size_t *sizes
, unsigned short *kinds
, int async
,
1023 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
1025 struct goacc_thread
*thr
;
1026 struct gomp_device_descr
*acc_dev
;
1027 bool data_enter
= false;
1030 goacc_lazy_initialize ();
1032 thr
= goacc_thread ();
1035 /* Determine whether "finalize" semantics apply to all mappings of this
1036 OpenACC directive. */
1037 bool finalize
= false;
1040 unsigned char kind
= kinds
[0] & 0xff;
1041 if (kind
== GOMP_MAP_DELETE
1042 || kind
== GOMP_MAP_FORCE_FROM
)
1046 /* Determine if this is an "acc enter data". */
1047 for (i
= 0; i
< mapnum
; ++i
)
1049 unsigned char kind
= kinds
[i
] & 0xff;
1051 if (kind
== GOMP_MAP_POINTER
|| kind
== GOMP_MAP_TO_PSET
)
1054 if (kind
== GOMP_MAP_FORCE_ALLOC
1055 || kind
== GOMP_MAP_FORCE_PRESENT
1056 || kind
== GOMP_MAP_FORCE_TO
1057 || kind
== GOMP_MAP_TO
1058 || kind
== GOMP_MAP_ALLOC
)
1064 if (kind
== GOMP_MAP_RELEASE
1065 || kind
== GOMP_MAP_DELETE
1066 || kind
== GOMP_MAP_FROM
1067 || kind
== GOMP_MAP_FORCE_FROM
)
1070 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1074 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
1076 acc_prof_info prof_info
;
1079 thr
->prof_info
= &prof_info
;
1081 prof_info
.event_type
1082 = data_enter
? acc_ev_enter_data_start
: acc_ev_exit_data_start
;
1083 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
1084 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
1085 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
1086 prof_info
.device_number
= acc_dev
->target_id
;
1087 prof_info
.thread_id
= -1;
1088 prof_info
.async
= async
;
1089 prof_info
.async_queue
= prof_info
.async
;
1090 prof_info
.src_file
= NULL
;
1091 prof_info
.func_name
= NULL
;
1092 prof_info
.line_no
= -1;
1093 prof_info
.end_line_no
= -1;
1094 prof_info
.func_line_no
= -1;
1095 prof_info
.func_end_line_no
= -1;
1097 acc_event_info enter_exit_data_event_info
;
1100 enter_exit_data_event_info
.other_event
.event_type
1101 = prof_info
.event_type
;
1102 enter_exit_data_event_info
.other_event
.valid_bytes
1103 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
1104 enter_exit_data_event_info
.other_event
.parent_construct
1105 = data_enter
? acc_construct_enter_data
: acc_construct_exit_data
;
1106 enter_exit_data_event_info
.other_event
.implicit
= 0;
1107 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
1109 acc_api_info api_info
;
1112 thr
->api_info
= &api_info
;
1114 api_info
.device_api
= acc_device_api_none
;
1115 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
1116 api_info
.device_type
= prof_info
.device_type
;
1117 api_info
.vendor
= -1;
1118 api_info
.device_handle
= NULL
;
1119 api_info
.context_handle
= NULL
;
1120 api_info
.async_handle
= NULL
;
1124 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
1127 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1128 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
1130 prof_info
.device_type
= acc_device_host
;
1131 api_info
.device_type
= prof_info
.device_type
;
1140 va_start (ap
, num_waits
);
1141 goacc_wait (async
, num_waits
, &ap
);
1145 /* In c, non-pointers and arrays are represented by a single data clause.
1146 Dynamically allocated arrays and subarrays are represented by a data
1147 clause followed by an internal GOMP_MAP_POINTER.
1149 In fortran, scalars and not allocated arrays are represented by a
1150 single data clause. Allocated arrays and subarrays have three mappings:
1151 1) the original data clause, 2) a PSET 3) a pointer to the array data.
1156 for (i
= 0; i
< mapnum
; i
++)
1158 unsigned char kind
= kinds
[i
] & 0xff;
1160 /* Scan for pointers and PSETs. */
1161 int pointer
= find_pointer (i
, mapnum
, kinds
);
1167 case GOMP_MAP_ALLOC
:
1168 case GOMP_MAP_FORCE_ALLOC
:
1169 acc_create_async (hostaddrs
[i
], sizes
[i
], async
);
1172 case GOMP_MAP_FORCE_TO
:
1173 acc_copyin_async (hostaddrs
[i
], sizes
[i
], async
);
1176 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1183 goacc_insert_pointer (pointer
, &hostaddrs
[i
], &sizes
[i
], &kinds
[i
],
1185 /* Increment 'i' by two because OpenACC requires fortran
1186 arrays to be contiguous, so each PSET is associated with
1187 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
1194 for (i
= 0; i
< mapnum
; ++i
)
1196 unsigned char kind
= kinds
[i
] & 0xff;
1198 int pointer
= find_pointer (i
, mapnum
, kinds
);
1204 case GOMP_MAP_RELEASE
:
1205 case GOMP_MAP_DELETE
:
1206 if (acc_is_present (hostaddrs
[i
], sizes
[i
]))
1209 acc_delete_finalize_async (hostaddrs
[i
], sizes
[i
], async
);
1211 acc_delete_async (hostaddrs
[i
], sizes
[i
], async
);
1215 case GOMP_MAP_FORCE_FROM
:
1217 acc_copyout_finalize_async (hostaddrs
[i
], sizes
[i
], async
);
1219 acc_copyout_async (hostaddrs
[i
], sizes
[i
], async
);
1222 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1229 bool copyfrom
= (kind
== GOMP_MAP_FORCE_FROM
1230 || kind
== GOMP_MAP_FROM
);
1231 goacc_remove_pointer (hostaddrs
[i
], sizes
[i
], copyfrom
, async
,
1233 /* See the above comment. */
1241 prof_info
.event_type
1242 = data_enter
? acc_ev_enter_data_end
: acc_ev_exit_data_end
;
1243 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
1244 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
1247 thr
->prof_info
= NULL
;
1248 thr
->api_info
= NULL
;