1 /* OpenACC Runtime initialization routines
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
5 Contributed by Mentor Embedded.
7 This file is part of the GNU Offloading and Multi Processing Library
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
31 #include "gomp-constants.h"
36 /* Return block containing [H->S), or NULL if not contained. The device lock
37 for DEV must be locked on entry, and remains locked on exit. */
40 lookup_host (struct gomp_device_descr
*dev
, void *h
, size_t s
)
42 struct splay_tree_key_s node
;
45 node
.host_start
= (uintptr_t) h
;
46 node
.host_end
= (uintptr_t) h
+ s
;
48 key
= splay_tree_lookup (&dev
->mem_map
, &node
);
53 /* Helper for lookup_dev. Iterate over splay tree. */
56 lookup_dev_1 (splay_tree_node node
, uintptr_t d
, size_t s
)
58 splay_tree_key key
= &node
->key
;
59 if (d
>= key
->tgt
->tgt_start
&& d
+ s
<= key
->tgt
->tgt_end
)
64 key
= lookup_dev_1 (node
->left
, d
, s
);
65 if (!key
&& node
->right
)
66 key
= lookup_dev_1 (node
->right
, d
, s
);
71 /* Return block containing [D->S), or NULL if not contained.
73 This iterates over the splay tree. This is not expected to be a common
76 The device lock associated with MEM_MAP must be locked on entry, and remains
80 lookup_dev (splay_tree mem_map
, void *d
, size_t s
)
82 if (!mem_map
|| !mem_map
->root
)
85 return lookup_dev_1 (mem_map
->root
, (uintptr_t) d
, s
);
89 /* OpenACC is silent on how memory exhaustion is indicated. We return
98 goacc_lazy_initialize ();
100 struct goacc_thread
*thr
= goacc_thread ();
104 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
107 acc_prof_info prof_info
;
108 acc_api_info api_info
;
109 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
111 void *res
= thr
->dev
->alloc_func (thr
->dev
->target_id
, s
);
115 thr
->prof_info
= NULL
;
116 thr
->api_info
= NULL
;
130 struct goacc_thread
*thr
= goacc_thread ();
132 assert (thr
&& thr
->dev
);
134 struct gomp_device_descr
*acc_dev
= thr
->dev
;
136 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
139 acc_prof_info prof_info
;
140 acc_api_info api_info
;
141 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
143 gomp_mutex_lock (&acc_dev
->lock
);
145 /* We don't have to call lazy open here, as the ptr value must have
146 been returned by acc_malloc. It's not permitted to pass NULL in
147 (unless you got that null from acc_malloc). */
148 if ((k
= lookup_dev (&acc_dev
->mem_map
, d
, 1)))
150 void *offset
= d
- k
->tgt
->tgt_start
+ k
->tgt_offset
;
151 void *h
= k
->host_start
+ offset
;
152 size_t h_size
= k
->host_end
- k
->host_start
;
153 gomp_mutex_unlock (&acc_dev
->lock
);
154 /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
155 used in a mapping". */
156 gomp_fatal ("refusing to free device memory space at %p that is still"
157 " mapped at [%p,+%d]",
161 gomp_mutex_unlock (&acc_dev
->lock
);
163 if (!acc_dev
->free_func (acc_dev
->target_id
, d
))
164 gomp_fatal ("error in freeing device memory in %s", __FUNCTION__
);
168 thr
->prof_info
= NULL
;
169 thr
->api_info
= NULL
;
174 memcpy_tofrom_device (bool from
, void *d
, void *h
, size_t s
, int async
,
175 const char *libfnname
)
177 /* No need to call lazy open here, as the device pointer must have
178 been obtained from a routine that did that. */
179 struct goacc_thread
*thr
= goacc_thread ();
181 assert (thr
&& thr
->dev
);
183 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
192 acc_prof_info prof_info
;
193 acc_api_info api_info
;
194 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
197 prof_info
.async
= async
;
198 prof_info
.async_queue
= prof_info
.async
;
201 goacc_aq aq
= get_goacc_asyncqueue (async
);
203 gomp_copy_dev2host (thr
->dev
, aq
, h
, d
, s
);
205 gomp_copy_host2dev (thr
->dev
, aq
, d
, h
, s
, /* TODO: cbuf? */ NULL
);
209 thr
->prof_info
= NULL
;
210 thr
->api_info
= NULL
;
215 acc_memcpy_to_device (void *d
, void *h
, size_t s
)
217 memcpy_tofrom_device (false, d
, h
, s
, acc_async_sync
, __FUNCTION__
);
221 acc_memcpy_to_device_async (void *d
, void *h
, size_t s
, int async
)
223 memcpy_tofrom_device (false, d
, h
, s
, async
, __FUNCTION__
);
227 acc_memcpy_from_device (void *h
, void *d
, size_t s
)
229 memcpy_tofrom_device (true, d
, h
, s
, acc_async_sync
, __FUNCTION__
);
233 acc_memcpy_from_device_async (void *h
, void *d
, size_t s
, int async
)
235 memcpy_tofrom_device (true, d
, h
, s
, async
, __FUNCTION__
);
238 /* Return the device pointer that corresponds to host data H. Or NULL
242 acc_deviceptr (void *h
)
248 goacc_lazy_initialize ();
250 struct goacc_thread
*thr
= goacc_thread ();
251 struct gomp_device_descr
*dev
= thr
->dev
;
253 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
256 /* In the following, no OpenACC Profiling Interface events can possibly be
259 gomp_mutex_lock (&dev
->lock
);
261 n
= lookup_host (dev
, h
, 1);
265 gomp_mutex_unlock (&dev
->lock
);
269 offset
= h
- n
->host_start
;
271 d
= n
->tgt
->tgt_start
+ n
->tgt_offset
+ offset
;
273 gomp_mutex_unlock (&dev
->lock
);
278 /* Return the host pointer that corresponds to device data D. Or NULL
282 acc_hostptr (void *d
)
288 goacc_lazy_initialize ();
290 struct goacc_thread
*thr
= goacc_thread ();
291 struct gomp_device_descr
*acc_dev
= thr
->dev
;
293 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
296 /* In the following, no OpenACC Profiling Interface events can possibly be
299 gomp_mutex_lock (&acc_dev
->lock
);
301 n
= lookup_dev (&acc_dev
->mem_map
, d
, 1);
305 gomp_mutex_unlock (&acc_dev
->lock
);
309 offset
= d
- n
->tgt
->tgt_start
+ n
->tgt_offset
;
311 h
= n
->host_start
+ offset
;
313 gomp_mutex_unlock (&acc_dev
->lock
);
318 /* Return 1 if host data [H,+S] is present on the device. */
321 acc_is_present (void *h
, size_t s
)
328 goacc_lazy_initialize ();
330 struct goacc_thread
*thr
= goacc_thread ();
331 struct gomp_device_descr
*acc_dev
= thr
->dev
;
333 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
336 /* In the following, no OpenACC Profiling Interface events can possibly be
339 gomp_mutex_lock (&acc_dev
->lock
);
341 n
= lookup_host (acc_dev
, h
, s
);
343 if (n
&& ((uintptr_t)h
< n
->host_start
344 || (uintptr_t)h
+ s
> n
->host_end
345 || s
> n
->host_end
- n
->host_start
))
348 gomp_mutex_unlock (&acc_dev
->lock
);
353 /* Create a mapping for host [H,+S] -> device [D,+S] */
356 acc_map_data (void *h
, void *d
, size_t s
)
362 unsigned short kinds
= GOMP_MAP_ALLOC
;
364 goacc_lazy_initialize ();
366 struct goacc_thread
*thr
= goacc_thread ();
367 struct gomp_device_descr
*acc_dev
= thr
->dev
;
369 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
372 gomp_fatal ("cannot map data on shared-memory system");
376 struct goacc_thread
*thr
= goacc_thread ();
379 gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
380 (void *)h
, (int)s
, (void *)d
, (int)s
);
382 acc_prof_info prof_info
;
383 acc_api_info api_info
;
384 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
386 gomp_mutex_lock (&acc_dev
->lock
);
388 if (lookup_host (acc_dev
, h
, s
))
390 gomp_mutex_unlock (&acc_dev
->lock
);
391 gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h
,
395 if (lookup_dev (&thr
->dev
->mem_map
, d
, s
))
397 gomp_mutex_unlock (&acc_dev
->lock
);
398 gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d
,
402 gomp_mutex_unlock (&acc_dev
->lock
);
404 struct target_mem_desc
*tgt
405 = gomp_map_vars (acc_dev
, mapnum
, &hostaddrs
, &devaddrs
, &sizes
,
406 &kinds
, true, GOMP_MAP_VARS_ENTER_DATA
);
408 assert (tgt
->list_count
== 1);
409 splay_tree_key n
= tgt
->list
[0].key
;
411 assert (n
->refcount
== 1);
412 assert (n
->virtual_refcount
== 0);
413 /* Special reference counting behavior. */
414 n
->refcount
= REFCOUNT_INFINITY
;
418 thr
->prof_info
= NULL
;
419 thr
->api_info
= NULL
;
425 acc_unmap_data (void *h
)
427 struct goacc_thread
*thr
= goacc_thread ();
428 struct gomp_device_descr
*acc_dev
= thr
->dev
;
430 /* No need to call lazy open, as the address must have been mapped. */
432 /* This is a no-op on shared-memory targets. */
433 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
436 acc_prof_info prof_info
;
437 acc_api_info api_info
;
438 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
440 gomp_mutex_lock (&acc_dev
->lock
);
442 splay_tree_key n
= lookup_host (acc_dev
, h
, 1);
446 gomp_mutex_unlock (&acc_dev
->lock
);
447 gomp_fatal ("%p is not a mapped block", (void *)h
);
450 size_t host_size
= n
->host_end
- n
->host_start
;
452 if (n
->host_start
!= (uintptr_t) h
)
454 gomp_mutex_unlock (&acc_dev
->lock
);
455 gomp_fatal ("[%p,%d] surrounds %p",
456 (void *) n
->host_start
, (int) host_size
, (void *) h
);
458 /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
459 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating
460 the different 'REFCOUNT_INFINITY' cases, or simply separate
461 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
463 else if (n
->refcount
!= REFCOUNT_INFINITY
)
465 gomp_mutex_unlock (&acc_dev
->lock
);
466 gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
467 " by 'acc_map_data'",
468 (void *) h
, (int) host_size
);
471 struct target_mem_desc
*tgt
= n
->tgt
;
473 if (tgt
->refcount
== REFCOUNT_INFINITY
)
475 gomp_mutex_unlock (&acc_dev
->lock
);
476 gomp_fatal ("cannot unmap target block");
479 /* Above, we've verified that the mapping must have been set up by
481 assert (tgt
->refcount
== 1);
483 /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
484 from freeing the target memory. */
488 bool is_tgt_unmapped
= gomp_remove_var (acc_dev
, n
);
489 assert (is_tgt_unmapped
);
491 gomp_mutex_unlock (&acc_dev
->lock
);
495 thr
->prof_info
= NULL
;
496 thr
->api_info
= NULL
;
501 /* Enter dynamic mapping for a single datum. Return the device pointer. */
504 goacc_enter_datum (void **hostaddrs
, size_t *sizes
, void *kinds
, int async
)
509 if (!hostaddrs
[0] || !sizes
[0])
510 gomp_fatal ("[%p,+%d] is a bad range", hostaddrs
[0], (int) sizes
[0]);
512 goacc_lazy_initialize ();
514 struct goacc_thread
*thr
= goacc_thread ();
515 struct gomp_device_descr
*acc_dev
= thr
->dev
;
517 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
520 acc_prof_info prof_info
;
521 acc_api_info api_info
;
522 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
525 prof_info
.async
= async
;
526 prof_info
.async_queue
= prof_info
.async
;
529 gomp_mutex_lock (&acc_dev
->lock
);
531 n
= lookup_host (acc_dev
, hostaddrs
[0], sizes
[0]);
534 void *h
= hostaddrs
[0];
538 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
+ h
- n
->host_start
);
540 if ((h
+ s
) > (void *)n
->host_end
)
542 gomp_mutex_unlock (&acc_dev
->lock
);
543 gomp_fatal ("[%p,+%d] not mapped", (void *)h
, (int)s
);
546 assert (n
->refcount
!= REFCOUNT_LINK
);
547 if (n
->refcount
!= REFCOUNT_INFINITY
)
550 n
->virtual_refcount
++;
553 gomp_mutex_unlock (&acc_dev
->lock
);
557 const size_t mapnum
= 1;
559 gomp_mutex_unlock (&acc_dev
->lock
);
561 goacc_aq aq
= get_goacc_asyncqueue (async
);
563 struct target_mem_desc
*tgt
564 = gomp_map_vars_async (acc_dev
, aq
, mapnum
, hostaddrs
, NULL
, sizes
,
565 kinds
, true, GOMP_MAP_VARS_OPENACC_ENTER_DATA
);
567 assert (tgt
->list_count
== 1);
568 n
= tgt
->list
[0].key
;
570 assert (n
->refcount
== 1);
571 assert (n
->virtual_refcount
== 0);
573 d
= (void *) tgt
->tgt_start
;
578 thr
->prof_info
= NULL
;
579 thr
->api_info
= NULL
;
586 acc_create (void *h
, size_t s
)
588 unsigned short kinds
[1] = { GOMP_MAP_ALLOC
};
589 return goacc_enter_datum (&h
, &s
, &kinds
, acc_async_sync
);
593 acc_create_async (void *h
, size_t s
, int async
)
595 unsigned short kinds
[1] = { GOMP_MAP_ALLOC
};
596 goacc_enter_datum (&h
, &s
, &kinds
, async
);
599 /* acc_present_or_create used to be what acc_create is now. */
600 /* acc_pcreate is acc_present_or_create by a different name. */
601 #ifdef HAVE_ATTRIBUTE_ALIAS
602 strong_alias (acc_create
, acc_present_or_create
)
603 strong_alias (acc_create
, acc_pcreate
)
606 acc_present_or_create (void *h
, size_t s
)
608 return acc_create (h
, s
);
612 acc_pcreate (void *h
, size_t s
)
614 return acc_create (h
, s
);
619 acc_copyin (void *h
, size_t s
)
621 unsigned short kinds
[1] = { GOMP_MAP_TO
};
622 return goacc_enter_datum (&h
, &s
, &kinds
, acc_async_sync
);
626 acc_copyin_async (void *h
, size_t s
, int async
)
628 unsigned short kinds
[1] = { GOMP_MAP_TO
};
629 goacc_enter_datum (&h
, &s
, &kinds
, async
);
632 /* acc_present_or_copyin used to be what acc_copyin is now. */
633 /* acc_pcopyin is acc_present_or_copyin by a different name. */
634 #ifdef HAVE_ATTRIBUTE_ALIAS
635 strong_alias (acc_copyin
, acc_present_or_copyin
)
636 strong_alias (acc_copyin
, acc_pcopyin
)
639 acc_present_or_copyin (void *h
, size_t s
)
641 return acc_copyin (h
, s
);
645 acc_pcopyin (void *h
, size_t s
)
647 return acc_copyin (h
, s
);
652 /* Exit a dynamic mapping for a single variable. */
655 goacc_exit_datum (void *h
, size_t s
, unsigned short kind
, int async
)
657 /* No need to call lazy open, as the data must already have been
662 struct goacc_thread
*thr
= goacc_thread ();
663 struct gomp_device_descr
*acc_dev
= thr
->dev
;
665 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
668 acc_prof_info prof_info
;
669 acc_api_info api_info
;
670 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
673 prof_info
.async
= async
;
674 prof_info
.async_queue
= prof_info
.async
;
677 gomp_mutex_lock (&acc_dev
->lock
);
679 splay_tree_key n
= lookup_host (acc_dev
, h
, s
);
681 /* PR92726, RP92970, PR92984: no-op. */
684 if ((uintptr_t) h
< n
->host_start
|| (uintptr_t) h
+ s
> n
->host_end
)
686 size_t host_size
= n
->host_end
- n
->host_start
;
687 gomp_mutex_unlock (&acc_dev
->lock
);
688 gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
689 (void *) h
, (int) s
, (void *) n
->host_start
, (int) host_size
);
692 bool finalize
= (kind
== GOMP_MAP_DELETE
693 || kind
== GOMP_MAP_FORCE_FROM
);
696 if (n
->refcount
!= REFCOUNT_INFINITY
)
697 n
->refcount
-= n
->virtual_refcount
;
698 n
->virtual_refcount
= 0;
701 if (n
->virtual_refcount
> 0)
703 if (n
->refcount
!= REFCOUNT_INFINITY
)
705 n
->virtual_refcount
--;
707 else if (n
->refcount
> 0 && n
->refcount
!= REFCOUNT_INFINITY
)
710 if (n
->refcount
== 0)
712 goacc_aq aq
= get_goacc_asyncqueue (async
);
714 bool copyout
= (kind
== GOMP_MAP_FROM
715 || kind
== GOMP_MAP_FORCE_FROM
);
718 void *d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
719 + (uintptr_t) h
- n
->host_start
);
720 gomp_copy_dev2host (acc_dev
, aq
, h
, d
, s
);
724 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
725 'gomp_unref_tgt' comment in
726 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
728 gomp_remove_var_async (acc_dev
, n
, aq
);
731 size_t num_mappings
= 0;
732 /* If the target_mem_desc represents a single data mapping, we can
733 check that it is freed when this splay tree key's refcount reaches
734 zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
735 multiple members), fall back to skipping the test. */
736 for (size_t l_i
= 0; l_i
< n
->tgt
->list_count
; ++l_i
)
737 if (n
->tgt
->list
[l_i
].key
)
739 bool is_tgt_unmapped
= gomp_remove_var (acc_dev
, n
);
740 assert (is_tgt_unmapped
|| num_mappings
> 1);
745 gomp_mutex_unlock (&acc_dev
->lock
);
749 thr
->prof_info
= NULL
;
750 thr
->api_info
= NULL
;
755 acc_delete (void *h
, size_t s
)
757 goacc_exit_datum (h
, s
, GOMP_MAP_RELEASE
, acc_async_sync
);
761 acc_delete_async (void *h
, size_t s
, int async
)
763 goacc_exit_datum (h
, s
, GOMP_MAP_RELEASE
, async
);
767 acc_delete_finalize (void *h
, size_t s
)
769 goacc_exit_datum (h
, s
, GOMP_MAP_DELETE
, acc_async_sync
);
773 acc_delete_finalize_async (void *h
, size_t s
, int async
)
775 goacc_exit_datum (h
, s
, GOMP_MAP_DELETE
, async
);
779 acc_copyout (void *h
, size_t s
)
781 goacc_exit_datum (h
, s
, GOMP_MAP_FROM
, acc_async_sync
);
785 acc_copyout_async (void *h
, size_t s
, int async
)
787 goacc_exit_datum (h
, s
, GOMP_MAP_FROM
, async
);
791 acc_copyout_finalize (void *h
, size_t s
)
793 goacc_exit_datum (h
, s
, GOMP_MAP_FORCE_FROM
, acc_async_sync
);
797 acc_copyout_finalize_async (void *h
, size_t s
, int async
)
799 goacc_exit_datum (h
, s
, GOMP_MAP_FORCE_FROM
, async
);
803 update_dev_host (int is_dev
, void *h
, size_t s
, int async
)
808 goacc_lazy_initialize ();
810 struct goacc_thread
*thr
= goacc_thread ();
811 struct gomp_device_descr
*acc_dev
= thr
->dev
;
813 if (acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
816 /* Fortran optional arguments that are non-present result in a
817 NULL host address here. This can safely be ignored as it is
818 not possible to 'update' a non-present optional argument. */
822 acc_prof_info prof_info
;
823 acc_api_info api_info
;
824 bool profiling_p
= GOACC_PROFILING_SETUP_P (thr
, &prof_info
, &api_info
);
827 prof_info
.async
= async
;
828 prof_info
.async_queue
= prof_info
.async
;
831 gomp_mutex_lock (&acc_dev
->lock
);
833 n
= lookup_host (acc_dev
, h
, s
);
837 gomp_mutex_unlock (&acc_dev
->lock
);
838 gomp_fatal ("[%p,%d] is not mapped", h
, (int)s
);
841 d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
842 + (uintptr_t) h
- n
->host_start
);
844 goacc_aq aq
= get_goacc_asyncqueue (async
);
847 gomp_copy_host2dev (acc_dev
, aq
, d
, h
, s
, /* TODO: cbuf? */ NULL
);
849 gomp_copy_dev2host (acc_dev
, aq
, h
, d
, s
);
851 gomp_mutex_unlock (&acc_dev
->lock
);
855 thr
->prof_info
= NULL
;
856 thr
->api_info
= NULL
;
861 acc_update_device (void *h
, size_t s
)
863 update_dev_host (1, h
, s
, acc_async_sync
);
867 acc_update_device_async (void *h
, size_t s
, int async
)
869 update_dev_host (1, h
, s
, async
);
873 acc_update_self (void *h
, size_t s
)
875 update_dev_host (0, h
, s
, acc_async_sync
);
879 acc_update_self_async (void *h
, size_t s
, int async
)
881 update_dev_host (0, h
, s
, async
);
885 acc_attach_async (void **hostaddr
, int async
)
887 struct goacc_thread
*thr
= goacc_thread ();
888 struct gomp_device_descr
*acc_dev
= thr
->dev
;
889 goacc_aq aq
= get_goacc_asyncqueue (async
);
891 struct splay_tree_key_s cur_node
;
894 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
897 gomp_mutex_lock (&acc_dev
->lock
);
899 cur_node
.host_start
= (uintptr_t) hostaddr
;
900 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
901 n
= splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
905 gomp_mutex_unlock (&acc_dev
->lock
);
906 gomp_fatal ("struct not mapped for acc_attach");
909 gomp_attach_pointer (acc_dev
, aq
, &acc_dev
->mem_map
, n
, (uintptr_t) hostaddr
,
912 gomp_mutex_unlock (&acc_dev
->lock
);
916 acc_attach (void **hostaddr
)
918 acc_attach_async (hostaddr
, acc_async_sync
);
922 goacc_detach_internal (void **hostaddr
, int async
, bool finalize
)
924 struct goacc_thread
*thr
= goacc_thread ();
925 struct gomp_device_descr
*acc_dev
= thr
->dev
;
926 struct splay_tree_key_s cur_node
;
928 struct goacc_asyncqueue
*aq
= get_goacc_asyncqueue (async
);
930 if (thr
->dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
933 gomp_mutex_lock (&acc_dev
->lock
);
935 cur_node
.host_start
= (uintptr_t) hostaddr
;
936 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
937 n
= splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
941 gomp_mutex_unlock (&acc_dev
->lock
);
942 gomp_fatal ("struct not mapped for acc_detach");
945 gomp_detach_pointer (acc_dev
, aq
, n
, (uintptr_t) hostaddr
, finalize
, NULL
);
947 gomp_mutex_unlock (&acc_dev
->lock
);
951 acc_detach (void **hostaddr
)
953 goacc_detach_internal (hostaddr
, acc_async_sync
, false);
957 acc_detach_async (void **hostaddr
, int async
)
959 goacc_detach_internal (hostaddr
, async
, false);
963 acc_detach_finalize (void **hostaddr
)
965 goacc_detach_internal (hostaddr
, acc_async_sync
, true);
969 acc_detach_finalize_async (void **hostaddr
, int async
)
971 goacc_detach_internal (hostaddr
, async
, true);
974 /* Some types of (pointer) variables use several consecutive mappings, which
975 must be treated as a group for enter/exit data directives. This function
976 returns the last mapping in such a group (inclusive), or POS for singleton
980 find_group_last (int pos
, size_t mapnum
, size_t *sizes
, unsigned short *kinds
)
982 unsigned char kind0
= kinds
[pos
] & 0xff;
987 case GOMP_MAP_TO_PSET
:
989 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
992 while (pos
+ 1 < mapnum
993 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_POINTER
)
995 /* We expect at least one GOMP_MAP_POINTER (if not a single
996 GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET. */
997 assert (pos
> first_pos
);
1000 case GOMP_MAP_STRUCT
:
1004 case GOMP_MAP_POINTER
:
1005 case GOMP_MAP_ALWAYS_POINTER
:
1006 /* These mappings are only expected after some other mapping. If we
1007 see one by itself, something has gone wrong. */
1008 gomp_fatal ("unexpected mapping");
1011 case GOMP_MAP_ATTACH
:
1015 /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
1017 if (pos
+ 1 < mapnum
)
1019 unsigned char kind1
= kinds
[pos
+ 1] & 0xff;
1020 if (kind1
== GOMP_MAP_ALWAYS_POINTER
)
1024 /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
1026 if (pos
+ 1 < mapnum
1027 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_ATTACH
)
1030 /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
1032 while (pos
+ 1 < mapnum
1033 && (kinds
[pos
+ 1] & 0xff) == GOMP_MAP_POINTER
)
1040 /* Map variables for OpenACC "enter data". We can't just call
1041 gomp_map_vars_async once, because individual mapped variables might have
1042 "exit data" called for them at different times. */
1045 goacc_enter_data_internal (struct gomp_device_descr
*acc_dev
, size_t mapnum
,
1046 void **hostaddrs
, size_t *sizes
,
1047 unsigned short *kinds
, goacc_aq aq
)
1049 for (size_t i
= 0; i
< mapnum
; i
++)
1051 int group_last
= find_group_last (i
, mapnum
, sizes
, kinds
);
1053 gomp_map_vars_async (acc_dev
, aq
,
1054 (group_last
- i
) + 1,
1055 &hostaddrs
[i
], NULL
,
1056 &sizes
[i
], &kinds
[i
], true,
1057 GOMP_MAP_VARS_OPENACC_ENTER_DATA
);
1063 /* Unmap variables for OpenACC "exit data". */
1066 goacc_exit_data_internal (struct gomp_device_descr
*acc_dev
, size_t mapnum
,
1067 void **hostaddrs
, size_t *sizes
,
1068 unsigned short *kinds
, goacc_aq aq
)
1070 gomp_mutex_lock (&acc_dev
->lock
);
1072 /* Handle "detach" before copyback/deletion of mapped data. */
1073 for (size_t i
= 0; i
< mapnum
; ++i
)
1075 unsigned char kind
= kinds
[i
] & 0xff;
1076 bool finalize
= false;
1079 case GOMP_MAP_FORCE_DETACH
:
1083 case GOMP_MAP_DETACH
:
1085 struct splay_tree_key_s cur_node
;
1086 uintptr_t hostaddr
= (uintptr_t) hostaddrs
[i
];
1087 cur_node
.host_start
= hostaddr
;
1088 cur_node
.host_end
= cur_node
.host_start
+ sizeof (void *);
1090 = splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
1094 gomp_mutex_unlock (&acc_dev
->lock
);
1095 gomp_fatal ("struct not mapped for detach operation");
1098 gomp_detach_pointer (acc_dev
, aq
, n
, hostaddr
, finalize
, NULL
);
1106 for (size_t i
= 0; i
< mapnum
; ++i
)
1108 unsigned char kind
= kinds
[i
] & 0xff;
1109 bool copyfrom
= false;
1110 bool finalize
= false;
1112 if (kind
== GOMP_MAP_FORCE_FROM
1113 || kind
== GOMP_MAP_DELETE
1114 || kind
== GOMP_MAP_FORCE_DETACH
)
1120 case GOMP_MAP_FORCE_FROM
:
1124 case GOMP_MAP_TO_PSET
:
1125 case GOMP_MAP_POINTER
:
1126 case GOMP_MAP_DELETE
:
1127 case GOMP_MAP_RELEASE
:
1128 case GOMP_MAP_DETACH
:
1129 case GOMP_MAP_FORCE_DETACH
:
1131 struct splay_tree_key_s cur_node
;
1133 if (kind
== GOMP_MAP_POINTER
1134 || kind
== GOMP_MAP_DETACH
1135 || kind
== GOMP_MAP_FORCE_DETACH
)
1136 size
= sizeof (void *);
1139 cur_node
.host_start
= (uintptr_t) hostaddrs
[i
];
1140 cur_node
.host_end
= cur_node
.host_start
+ size
;
1142 = splay_tree_lookup (&acc_dev
->mem_map
, &cur_node
);
1149 if (n
->refcount
!= REFCOUNT_INFINITY
)
1150 n
->refcount
-= n
->virtual_refcount
;
1151 n
->virtual_refcount
= 0;
1154 if (n
->virtual_refcount
> 0)
1156 if (n
->refcount
!= REFCOUNT_INFINITY
)
1158 n
->virtual_refcount
--;
1160 else if (n
->refcount
> 0 && n
->refcount
!= REFCOUNT_INFINITY
)
1163 if (n
->refcount
== 0)
1167 void *d
= (void *) (n
->tgt
->tgt_start
+ n
->tgt_offset
1168 + cur_node
.host_start
- n
->host_start
);
1169 gomp_copy_dev2host (acc_dev
, aq
,
1170 (void *) cur_node
.host_start
, d
,
1171 cur_node
.host_end
- cur_node
.host_start
);
1175 /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
1176 'gomp_unref_tgt' comment in
1177 <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
1179 gomp_remove_var_async (acc_dev
, n
, aq
);
1182 size_t num_mappings
= 0;
1183 /* If the target_mem_desc represents a single data mapping,
1184 we can check that it is freed when this splay tree key's
1185 refcount reaches zero. Otherwise (e.g. for a
1186 'GOMP_MAP_STRUCT' mapping with multiple members), fall
1187 back to skipping the test. */
1188 for (size_t l_i
= 0; l_i
< n
->tgt
->list_count
; ++l_i
)
1189 if (n
->tgt
->list
[l_i
].key
)
1191 bool is_tgt_unmapped
= gomp_remove_var (acc_dev
, n
);
1192 assert (is_tgt_unmapped
|| num_mappings
> 1);
1198 case GOMP_MAP_STRUCT
:
1199 /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
1200 for all its entries. This special handling exists for GCC 10.1
1201 compatibility; afterwards, we're not generating these no-op
1202 'GOMP_MAP_STRUCT's anymore. */
1206 gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
1211 gomp_mutex_unlock (&acc_dev
->lock
);
1215 GOACC_enter_exit_data (int flags_m
, size_t mapnum
, void **hostaddrs
,
1216 size_t *sizes
, unsigned short *kinds
, int async
,
1219 int flags
= GOACC_FLAGS_UNMARSHAL (flags_m
);
1221 struct goacc_thread
*thr
;
1222 struct gomp_device_descr
*acc_dev
;
1223 bool data_enter
= false;
1226 goacc_lazy_initialize ();
1228 thr
= goacc_thread ();
1231 /* Determine if this is an "acc enter data". */
1232 for (i
= 0; i
< mapnum
; ++i
)
1234 unsigned char kind
= kinds
[i
] & 0xff;
1236 if (kind
== GOMP_MAP_POINTER
1237 || kind
== GOMP_MAP_TO_PSET
1238 || kind
== GOMP_MAP_STRUCT
)
1241 if (kind
== GOMP_MAP_FORCE_ALLOC
1242 || kind
== GOMP_MAP_FORCE_PRESENT
1243 || kind
== GOMP_MAP_ATTACH
1244 || kind
== GOMP_MAP_FORCE_TO
1245 || kind
== GOMP_MAP_TO
1246 || kind
== GOMP_MAP_ALLOC
)
1252 if (kind
== GOMP_MAP_RELEASE
1253 || kind
== GOMP_MAP_DELETE
1254 || kind
== GOMP_MAP_DETACH
1255 || kind
== GOMP_MAP_FORCE_DETACH
1256 || kind
== GOMP_MAP_FROM
1257 || kind
== GOMP_MAP_FORCE_FROM
)
1260 gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
1264 bool profiling_p
= GOACC_PROFILING_DISPATCH_P (true);
1266 acc_prof_info prof_info
;
1269 thr
->prof_info
= &prof_info
;
1271 prof_info
.event_type
1272 = data_enter
? acc_ev_enter_data_start
: acc_ev_exit_data_start
;
1273 prof_info
.valid_bytes
= _ACC_PROF_INFO_VALID_BYTES
;
1274 prof_info
.version
= _ACC_PROF_INFO_VERSION
;
1275 prof_info
.device_type
= acc_device_type (acc_dev
->type
);
1276 prof_info
.device_number
= acc_dev
->target_id
;
1277 prof_info
.thread_id
= -1;
1278 prof_info
.async
= async
;
1279 prof_info
.async_queue
= prof_info
.async
;
1280 prof_info
.src_file
= NULL
;
1281 prof_info
.func_name
= NULL
;
1282 prof_info
.line_no
= -1;
1283 prof_info
.end_line_no
= -1;
1284 prof_info
.func_line_no
= -1;
1285 prof_info
.func_end_line_no
= -1;
1287 acc_event_info enter_exit_data_event_info
;
1290 enter_exit_data_event_info
.other_event
.event_type
1291 = prof_info
.event_type
;
1292 enter_exit_data_event_info
.other_event
.valid_bytes
1293 = _ACC_OTHER_EVENT_INFO_VALID_BYTES
;
1294 enter_exit_data_event_info
.other_event
.parent_construct
1295 = data_enter
? acc_construct_enter_data
: acc_construct_exit_data
;
1296 enter_exit_data_event_info
.other_event
.implicit
= 0;
1297 enter_exit_data_event_info
.other_event
.tool_info
= NULL
;
1299 acc_api_info api_info
;
1302 thr
->api_info
= &api_info
;
1304 api_info
.device_api
= acc_device_api_none
;
1305 api_info
.valid_bytes
= _ACC_API_INFO_VALID_BYTES
;
1306 api_info
.device_type
= prof_info
.device_type
;
1307 api_info
.vendor
= -1;
1308 api_info
.device_handle
= NULL
;
1309 api_info
.context_handle
= NULL
;
1310 api_info
.async_handle
= NULL
;
1314 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
1317 if ((acc_dev
->capabilities
& GOMP_OFFLOAD_CAP_SHARED_MEM
)
1318 || (flags
& GOACC_FLAG_HOST_FALLBACK
))
1320 prof_info
.device_type
= acc_device_host
;
1321 api_info
.device_type
= prof_info
.device_type
;
1330 va_start (ap
, num_waits
);
1331 goacc_wait (async
, num_waits
, &ap
);
1335 goacc_aq aq
= get_goacc_asyncqueue (async
);
1338 goacc_enter_data_internal (acc_dev
, mapnum
, hostaddrs
, sizes
, kinds
, aq
);
1340 goacc_exit_data_internal (acc_dev
, mapnum
, hostaddrs
, sizes
, kinds
, aq
);
1345 prof_info
.event_type
1346 = data_enter
? acc_ev_enter_data_end
: acc_ev_exit_data_end
;
1347 enter_exit_data_event_info
.other_event
.event_type
= prof_info
.event_type
;
1348 goacc_profiling_dispatch (&prof_info
, &enter_exit_data_event_info
,
1351 thr
->prof_info
= NULL
;
1352 thr
->api_info
= NULL
;