/* OpenACC Runtime initialization routines
- Copyright (C) 2013-2015 Free Software Foundation, Inc.
+ Copyright (C) 2013-2020 Free Software Foundation, Inc.
Contributed by Mentor Embedded.
<http://www.gnu.org/licenses/>. */
#include "openacc.h"
-#include "config.h"
#include "libgomp.h"
#include "gomp-constants.h"
#include "oacc-int.h"
-#include "splay-tree.h"
-#include <stdint.h>
+#include <string.h>
#include <assert.h>
-/* Return block containing [H->S), or NULL if not contained. */
+/* Return block containing [H->S), or NULL if not contained. The device lock
+ for DEV must be locked on entry, and remains locked on exit. */
static splay_tree_key
lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
node.host_start = (uintptr_t) h;
node.host_end = (uintptr_t) h + s;
- gomp_mutex_lock (&dev->lock);
key = splay_tree_lookup (&dev->mem_map, &node);
- gomp_mutex_unlock (&dev->lock);
return key;
}
-/* Return block containing [D->S), or NULL if not contained.
- The list isn't ordered by device address, so we have to iterate
- over the whole array. This is not expected to be a common
- operation. */
+/* Helper for lookup_dev. Iterate over splay tree. */
static splay_tree_key
-lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
+lookup_dev_1 (splay_tree_node node, uintptr_t d, size_t s)
{
- int i;
- struct target_mem_desc *t;
+ splay_tree_key key = &node->key;
+ if (d >= key->tgt->tgt_start && d + s <= key->tgt->tgt_end)
+ return key;
- if (!tgt)
- return NULL;
-
- gomp_mutex_lock (&tgt->device_descr->lock);
-
- for (t = tgt; t != NULL; t = t->prev)
- {
- if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
- break;
- }
+ key = NULL;
+ if (node->left)
+ key = lookup_dev_1 (node->left, d, s);
+ if (!key && node->right)
+ key = lookup_dev_1 (node->right, d, s);
- gomp_mutex_unlock (&tgt->device_descr->lock);
+ return key;
+}
- if (!t)
- return NULL;
+/* Return block containing [D->S), or NULL if not contained.
- for (i = 0; i < t->list_count; i++)
- {
- void * offset;
+ This iterates over the splay tree. This is not expected to be a common
+ operation.
- splay_tree_key k = &t->array[i].key;
- offset = d - t->tgt_start + k->tgt_offset;
+ The device lock associated with MEM_MAP must be locked on entry, and remains
+ locked on exit. */
- if (k->host_start + offset <= (void *) k->host_end)
- return k;
- }
+static splay_tree_key
+lookup_dev (splay_tree mem_map, void *d, size_t s)
+{
+ if (!mem_map || !mem_map->root)
+ return NULL;
- return NULL;
+ return lookup_dev_1 (mem_map->root, (uintptr_t) d, s);
}
+
/* OpenACC is silent on how memory exhaustion is indicated. We return
NULL. */
assert (thr->dev);
- return thr->dev->alloc_func (thr->dev->target_id, s);
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return malloc (s);
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+ void *res = thr->dev->alloc_func (thr->dev->target_id, s);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+
+ return res;
}
-/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
- the device address is mapped. We choose to check if it mapped,
- and if it is, to unmap it. */
void
acc_free (void *d)
{
splay_tree_key k;
- struct goacc_thread *thr = goacc_thread ();
if (!d)
return;
+ struct goacc_thread *thr = goacc_thread ();
+
assert (thr && thr->dev);
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return free (d);
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+ gomp_mutex_lock (&acc_dev->lock);
+
/* We don't have to call lazy open here, as the ptr value must have
been returned by acc_malloc. It's not permitted to pass NULL in
(unless you got that null from acc_malloc). */
- if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
- {
- void *offset;
-
- offset = d - k->tgt->tgt_start + k->tgt_offset;
+ if ((k = lookup_dev (&acc_dev->mem_map, d, 1)))
+ {
+ void *offset = d - k->tgt->tgt_start + k->tgt_offset;
+ void *h = k->host_start + offset;
+ size_t h_size = k->host_end - k->host_start;
+ gomp_mutex_unlock (&acc_dev->lock);
+ /* PR92503 "[OpenACC] Behavior of 'acc_free' if the memory space is still
+ used in a mapping". */
+ gomp_fatal ("refusing to free device memory space at %p that is still"
+ " mapped at [%p,+%d]",
+ d, h, (int) h_size);
+ }
+ else
+ gomp_mutex_unlock (&acc_dev->lock);
- acc_unmap_data ((void *)(k->host_start + offset));
- }
+ if (!acc_dev->free_func (acc_dev->target_id, d))
+ gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
- thr->dev->free_func (thr->dev->target_id, d);
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
-void
-acc_memcpy_to_device (void *d, void *h, size_t s)
+static void
+memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async,
+ const char *libfnname)
{
/* No need to call lazy open here, as the device pointer must have
been obtained from a routine that did that. */
assert (thr && thr->dev);
- thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ {
+ if (from)
+ memmove (h, d, s);
+ else
+ memmove (d, h, s);
+ return;
+ }
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ if (from)
+ gomp_copy_dev2host (thr->dev, aq, h, d, s);
+ else
+ gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+}
+
+void
+acc_memcpy_to_device (void *d, void *h, size_t s)
+{
+ memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__);
}
void
-acc_memcpy_from_device (void *h, void *d, size_t s)
+acc_memcpy_to_device_async (void *d, void *h, size_t s, int async)
{
- /* No need to call lazy open here, as the device pointer must have
- been obtained from a routine that did that. */
- struct goacc_thread *thr = goacc_thread ();
+ memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__);
+}
- assert (thr && thr->dev);
+void
+acc_memcpy_from_device (void *h, void *d, size_t s)
+{
+ memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__);
+}
- thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
+void
+acc_memcpy_from_device_async (void *h, void *d, size_t s, int async)
+{
+ memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__);
}
/* Return the device pointer that corresponds to host data H. Or NULL
goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *dev = thr->dev;
- n = lookup_host (thr->dev, h, 1);
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return h;
+
+ /* In the following, no OpenACC Profiling Interface events can possibly be
+ generated. */
+
+ gomp_mutex_lock (&dev->lock);
+
+ n = lookup_host (dev, h, 1);
if (!n)
- return NULL;
+ {
+ gomp_mutex_unlock (&dev->lock);
+ return NULL;
+ }
offset = h - n->host_start;
d = n->tgt->tgt_start + n->tgt_offset + offset;
+ gomp_mutex_unlock (&dev->lock);
+
return d;
}
goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
+
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return d;
- n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
+ /* In the following, no OpenACC Profiling Interface events can possibly be
+ generated. */
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ n = lookup_dev (&acc_dev->mem_map, d, 1);
if (!n)
- return NULL;
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ return NULL;
+ }
offset = d - n->tgt->tgt_start + n->tgt_offset;
h = n->host_start + offset;
+ gomp_mutex_unlock (&acc_dev->lock);
+
return h;
}
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return h != NULL;
+
+ /* In the following, no OpenACC Profiling Interface events can possibly be
+ generated. */
+
+ gomp_mutex_lock (&acc_dev->lock);
+
n = lookup_host (acc_dev, h, s);
if (n && ((uintptr_t)h < n->host_start
|| s > n->host_end - n->host_start))
n = NULL;
+ gomp_mutex_unlock (&acc_dev->lock);
+
return n != NULL;
}
void
acc_map_data (void *h, void *d, size_t s)
{
- struct target_mem_desc *tgt;
size_t mapnum = 1;
void *hostaddrs = h;
void *devaddrs = d;
{
if (d != h)
gomp_fatal ("cannot map data on shared-memory system");
-
- tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
}
else
{
gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
(void *)h, (int)s, (void *)d, (int)s);
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+ gomp_mutex_lock (&acc_dev->lock);
+
if (lookup_host (acc_dev, h, s))
- gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
- (int)s);
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
+ (int)s);
+ }
- if (lookup_dev (thr->dev->openacc.data_environ, d, s))
- gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
- (int)s);
+ if (lookup_dev (&thr->dev->mem_map, d, s))
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
+ (int)s);
+ }
- tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
- &kinds, true, false);
- }
+ gomp_mutex_unlock (&acc_dev->lock);
- tgt->prev = acc_dev->openacc.data_environ;
- acc_dev->openacc.data_environ = tgt;
+ struct target_mem_desc *tgt
+ = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
+ &kinds, true,
+ GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_ENTER_DATA);
+ assert (tgt);
+ assert (tgt->list_count == 1);
+ splay_tree_key n = tgt->list[0].key;
+ assert (n);
+ assert (n->refcount == 1);
+ assert (n->dynamic_refcount == 0);
+ /* Special reference counting behavior. */
+ n->refcount = REFCOUNT_INFINITY;
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+ }
}
void
/* No need to call lazy open, as the address must have been mapped. */
- size_t host_size;
+ /* This is a no-op on shared-memory targets. */
+ if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+ gomp_mutex_lock (&acc_dev->lock);
+
splay_tree_key n = lookup_host (acc_dev, h, 1);
- struct target_mem_desc *t;
if (!n)
- gomp_fatal ("%p is not a mapped block", (void *)h);
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("%p is not a mapped block", (void *)h);
+ }
- host_size = n->host_end - n->host_start;
+ size_t host_size = n->host_end - n->host_start;
if (n->host_start != (uintptr_t) h)
- gomp_fatal ("[%p,%d] surrounds1 %p",
- (void *) n->host_start, (int) host_size, (void *) h);
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("[%p,%d] surrounds %p",
+ (void *) n->host_start, (int) host_size, (void *) h);
+ }
+ /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
+ 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
+ the different 'REFCOUNT_INFINITY' cases, or simply separate
+ 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
+ etc.)? */
+ else if (n->refcount != REFCOUNT_INFINITY)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("refusing to unmap block [%p,+%d] that has not been mapped"
+ " by 'acc_map_data'",
+ (void *) h, (int) host_size);
+ }
- t = n->tgt;
+ struct target_mem_desc *tgt = n->tgt;
- if (t->refcount == 2)
+ if (tgt->refcount == REFCOUNT_INFINITY)
{
- struct target_mem_desc *tp;
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("cannot unmap target block");
+ }
- /* This is the last reference, so pull the descriptor off the
- chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
- freeing the device memory. */
- t->tgt_end = 0;
- t->to_free = 0;
+ /* Above, we've verified that the mapping must have been set up by
+ 'acc_map_data'. */
+ assert (tgt->refcount == 1);
- gomp_mutex_lock (&acc_dev->lock);
+ /* Nullifying these fields prevents 'gomp_unmap_tgt' via 'gomp_remove_var'
+ from freeing the target memory. */
+ tgt->tgt_end = 0;
+ tgt->to_free = NULL;
- for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
- tp = t, t = t->prev)
- if (n->tgt == t)
- {
- if (tp)
- tp->prev = t->prev;
- else
- acc_dev->openacc.data_environ = t->prev;
+ bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
+ assert (is_tgt_unmapped);
- break;
- }
+ gomp_mutex_unlock (&acc_dev->lock);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+}
+
+/* Helper function to map a single dynamic data item, represented by a single
+ mapping. The acc_dev->lock should be held on entry, and remains locked on
+ exit. */
+
+static void *
+goacc_map_var_existing (struct gomp_device_descr *acc_dev, void *hostaddr,
+ size_t size, splay_tree_key n)
+{
+ assert (n);
+
+ /* Present. */
+ void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + hostaddr
+ - n->host_start);
+
+ if (hostaddr + size > (void *) n->host_end)
+ {
gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("[%p,+%d] not mapped", hostaddr, (int) size);
}
- gomp_unmap_vars (t, true);
+ assert (n->refcount != REFCOUNT_LINK);
+ if (n->refcount != REFCOUNT_INFINITY)
+ n->refcount++;
+ n->dynamic_refcount++;
+
+ return d;
}
-#define FLAG_PRESENT (1 << 0)
-#define FLAG_CREATE (1 << 1)
-#define FLAG_COPY (1 << 2)
+/* Enter dynamic mapping for a single datum. Return the device pointer. */
static void *
-present_create_copy (unsigned f, void *h, size_t s)
+goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
{
void *d;
splay_tree_key n;
- if (!h || !s)
- gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
+ if (!hostaddrs[0] || !sizes[0])
+ gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
goacc_lazy_initialize ();
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
- n = lookup_host (acc_dev, h, s);
- if (n)
- {
- /* Present. */
- d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+ if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return hostaddrs[0];
- if (!(f & FLAG_PRESENT))
- gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
- (void *)h, (int)s, (void *)d, (int)s);
- if ((h + s) > (void *)n->host_end)
- gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
}
- else if (!(f & FLAG_CREATE))
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
+ if (n)
{
- gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
+ d = goacc_map_var_existing (acc_dev, hostaddrs[0], sizes[0], n);
+ gomp_mutex_unlock (&acc_dev->lock);
}
else
{
- struct target_mem_desc *tgt;
- size_t mapnum = 1;
- unsigned short kinds;
- void *hostaddrs = h;
-
- if (f & FLAG_COPY)
- kinds = GOMP_MAP_TO;
- else
- kinds = GOMP_MAP_ALLOC;
+ const size_t mapnum = 1;
- tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
- false);
-
- gomp_mutex_lock (&acc_dev->lock);
+ gomp_mutex_unlock (&acc_dev->lock);
- d = tgt->to_free;
- tgt->prev = acc_dev->openacc.data_environ;
- acc_dev->openacc.data_environ = tgt;
+ goacc_aq aq = get_goacc_asyncqueue (async);
+
+ struct target_mem_desc *tgt
+ = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
+ kinds, true, (GOMP_MAP_VARS_OPENACC
+ | GOMP_MAP_VARS_ENTER_DATA));
+ assert (tgt);
+ assert (tgt->list_count == 1);
+ n = tgt->list[0].key;
+ assert (n);
+ assert (n->refcount == 1);
+ assert (n->dynamic_refcount == 0);
+ n->dynamic_refcount++;
+
+ d = (void *) tgt->tgt_start;
+ }
- gomp_mutex_unlock (&acc_dev->lock);
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
}
return d;
void *
acc_create (void *h, size_t s)
{
- return present_create_copy (FLAG_CREATE, h, s);
+ unsigned short kinds[1] = { GOMP_MAP_ALLOC };
+ return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
}
-void *
-acc_copyin (void *h, size_t s)
+void
+acc_create_async (void *h, size_t s, int async)
{
- return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
+ unsigned short kinds[1] = { GOMP_MAP_ALLOC };
+ goacc_enter_datum (&h, &s, &kinds, async);
}
+/* acc_present_or_create used to be what acc_create is now. */
+/* acc_pcreate is acc_present_or_create by a different name. */
+#ifdef HAVE_ATTRIBUTE_ALIAS
+strong_alias (acc_create, acc_present_or_create)
+strong_alias (acc_create, acc_pcreate)
+#else
void *
acc_present_or_create (void *h, size_t s)
{
- return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
+ return acc_create (h, s);
+}
+
+void *
+acc_pcreate (void *h, size_t s)
+{
+ return acc_create (h, s);
}
+#endif
+void *
+acc_copyin (void *h, size_t s)
+{
+ unsigned short kinds[1] = { GOMP_MAP_TO };
+ return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
+}
+
+void
+acc_copyin_async (void *h, size_t s, int async)
+{
+ unsigned short kinds[1] = { GOMP_MAP_TO };
+ goacc_enter_datum (&h, &s, &kinds, async);
+}
+
+/* acc_present_or_copyin used to be what acc_copyin is now. */
+/* acc_pcopyin is acc_present_or_copyin by a different name. */
+#ifdef HAVE_ATTRIBUTE_ALIAS
+strong_alias (acc_copyin, acc_present_or_copyin)
+strong_alias (acc_copyin, acc_pcopyin)
+#else
void *
acc_present_or_copyin (void *h, size_t s)
{
- return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
+ return acc_copyin (h, s);
}
-#define FLAG_COPYOUT (1 << 0)
+void *
+acc_pcopyin (void *h, size_t s)
+{
+ return acc_copyin (h, s);
+}
+#endif
+
+
+/* Helper function to unmap a single data item. Device lock should be held on
+ entry, and remains locked on exit. */
static void
-delete_copyout (unsigned f, void *h, size_t s)
+goacc_exit_datum_1 (struct gomp_device_descr *acc_dev, void *h, size_t s,
+ unsigned short kind, splay_tree_key n, goacc_aq aq)
{
- size_t host_size;
- splay_tree_key n;
- void *d;
- struct goacc_thread *thr = goacc_thread ();
- struct gomp_device_descr *acc_dev = thr->dev;
+ assert (kind != GOMP_MAP_DETACH
+ && kind != GOMP_MAP_FORCE_DETACH);
- n = lookup_host (acc_dev, h, s);
+ if ((uintptr_t) h < n->host_start || (uintptr_t) h + s > n->host_end)
+ {
+ size_t host_size = n->host_end - n->host_start;
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("[%p,+%d] outside mapped block [%p,+%d]",
+ (void *) h, (int) s, (void *) n->host_start, (int) host_size);
+ }
+
+ bool finalize = (kind == GOMP_MAP_FORCE_FROM
+ || kind == GOMP_MAP_DELETE);
+
+ assert (n->refcount != REFCOUNT_LINK);
+ if (n->refcount != REFCOUNT_INFINITY
+ && n->refcount < n->dynamic_refcount)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("Dynamic reference counting assert fail\n");
+ }
+
+ if (finalize)
+ {
+ if (n->refcount != REFCOUNT_INFINITY)
+ n->refcount -= n->dynamic_refcount;
+ n->dynamic_refcount = 0;
+ }
+ else if (n->dynamic_refcount)
+ {
+ if (n->refcount != REFCOUNT_INFINITY)
+ n->refcount--;
+ n->dynamic_refcount--;
+ }
+
+ if (n->refcount == 0)
+ {
+ bool copyout = (kind == GOMP_MAP_FROM
+ || kind == GOMP_MAP_FORCE_FROM);
+ if (copyout)
+ {
+ void *d = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + (uintptr_t) h - n->host_start);
+ gomp_copy_dev2host (acc_dev, aq, h, d, s);
+ }
+ if (aq)
+ /* TODO We can't do the 'is_tgt_unmapped' checking -- see the
+ 'gomp_unref_tgt' comment in
+ <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
+ PR92881. */
+ gomp_remove_var_async (acc_dev, n, aq);
+ else
+ {
+ size_t num_mappings = 0;
+ /* If the target_mem_desc represents a single data mapping, we can
+ check that it is freed when this splay tree key's refcount reaches
+ zero. Otherwise (e.g. for a 'GOMP_MAP_STRUCT' mapping with
+ multiple members), fall back to skipping the test. */
+ for (size_t l_i = 0; l_i < n->tgt->list_count; ++l_i)
+ if (n->tgt->list[l_i].key
+ && !n->tgt->list[l_i].is_attach)
+ ++num_mappings;
+ bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
+ assert (is_tgt_unmapped || num_mappings > 1);
+ }
+ }
+}
+
+
+/* Exit a dynamic mapping for a single variable. */
+
+static void
+goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
+{
/* No need to call lazy open, as the data must already have been
mapped. */
- if (!n)
- gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
+ kind &= 0xff;
- d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+ struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
- host_size = n->host_end - n->host_start;
+ if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
- if (n->host_start != (uintptr_t) h || host_size != s)
- gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
- (void *) n->host_start, (int) host_size, (void *) h, (int) s);
+ gomp_mutex_lock (&acc_dev->lock);
- if (f & FLAG_COPYOUT)
- acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+ splay_tree_key n = lookup_host (acc_dev, h, s);
+ /* Non-present data is a no-op: PR92726, RP92970, PR92984. */
+ if (n)
+ {
+ goacc_aq aq = get_goacc_asyncqueue (async);
+ goacc_exit_datum_1 (acc_dev, h, s, kind, n, aq);
+ }
- acc_unmap_data (h);
+ gomp_mutex_unlock (&acc_dev->lock);
- acc_dev->free_func (acc_dev->target_id, d);
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
void
acc_delete (void *h , size_t s)
{
- delete_copyout (0, h, s);
+ goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
+}
+
+void
+acc_delete_async (void *h , size_t s, int async)
+{
+ goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
+}
+
+void
+acc_delete_finalize (void *h , size_t s)
+{
+ goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
+}
+
+void
+acc_delete_finalize_async (void *h , size_t s, int async)
+{
+ goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
}
-void acc_copyout (void *h, size_t s)
+void
+acc_copyout (void *h, size_t s)
+{
+ goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
+}
+
+void
+acc_copyout_async (void *h, size_t s, int async)
+{
+ goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
+}
+
+void
+acc_copyout_finalize (void *h, size_t s)
{
- delete_copyout (FLAG_COPYOUT, h, s);
+ goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
+}
+
+void
+acc_copyout_finalize_async (void *h, size_t s, int async)
+{
+ goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
}
static void
-update_dev_host (int is_dev, void *h, size_t s)
+update_dev_host (int is_dev, void *h, size_t s, int async)
{
splay_tree_key n;
void *d;
+
+ goacc_lazy_initialize ();
+
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
- n = lookup_host (acc_dev, h, s);
+ if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
- /* No need to call lazy open, as the data must already have been
- mapped. */
+ /* Fortran optional arguments that are non-present result in a
+ NULL host address here. This can safely be ignored as it is
+ not possible to 'update' a non-present optional argument. */
+ if (h == NULL)
+ return;
+
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ n = lookup_host (acc_dev, h, s);
if (!n)
- gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
+ }
+
+ d = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + (uintptr_t) h - n->host_start);
- d = (void *) (n->tgt->tgt_start + n->tgt_offset);
+ goacc_aq aq = get_goacc_asyncqueue (async);
if (is_dev)
- acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
+ gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
else
- acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+ gomp_copy_dev2host (acc_dev, aq, h, d, s);
+
+ gomp_mutex_unlock (&acc_dev->lock);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
void
acc_update_device (void *h, size_t s)
{
- update_dev_host (1, h, s);
+ update_dev_host (1, h, s, acc_async_sync);
+}
+
+void
+acc_update_device_async (void *h, size_t s, int async)
+{
+ update_dev_host (1, h, s, async);
}
void
acc_update_self (void *h, size_t s)
{
- update_dev_host (0, h, s);
+ update_dev_host (0, h, s, acc_async_sync);
}
void
-gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
- void *kinds)
+acc_update_self_async (void *h, size_t s, int async)
+{
+ update_dev_host (0, h, s, async);
+}
+
+void
+acc_attach_async (void **hostaddr, int async)
{
- struct target_mem_desc *tgt;
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ goacc_aq aq = get_goacc_asyncqueue (async);
+
+ struct splay_tree_key_s cur_node;
+ splay_tree_key n;
+
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ cur_node.host_start = (uintptr_t) hostaddr;
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("struct not mapped for acc_attach");
+ }
+
+ gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n, (uintptr_t) hostaddr,
+ 0, NULL);
- gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
- tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
- NULL, sizes, kinds, true, false);
- gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
- tgt->prev = acc_dev->openacc.data_environ;
- acc_dev->openacc.data_environ = tgt;
+ gomp_mutex_unlock (&acc_dev->lock);
}
void
-gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
+acc_attach (void **hostaddr)
+{
+ acc_attach_async (hostaddr, acc_async_sync);
+}
+
+static void
+goacc_detach_internal (void **hostaddr, int async, bool finalize)
{
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ struct splay_tree_key_s cur_node;
splay_tree_key n;
- struct target_mem_desc *t;
- int minrefs = (mapnum == 1) ? 2 : 3;
+ struct goacc_asyncqueue *aq = get_goacc_asyncqueue (async);
- n = lookup_host (acc_dev, h, 1);
+ if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ return;
- if (!n)
- gomp_fatal ("%p is not a mapped block", (void *)h);
+ gomp_mutex_lock (&acc_dev->lock);
- gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
+ cur_node.host_start = (uintptr_t) hostaddr;
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ n = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
- t = n->tgt;
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("struct not mapped for acc_detach");
+ }
+
+ gomp_detach_pointer (acc_dev, aq, n, (uintptr_t) hostaddr, finalize, NULL);
- struct target_mem_desc *tp;
+ gomp_mutex_unlock (&acc_dev->lock);
+}
+void
+acc_detach (void **hostaddr)
+{
+ goacc_detach_internal (hostaddr, acc_async_sync, false);
+}
+
+void
+acc_detach_async (void **hostaddr, int async)
+{
+ goacc_detach_internal (hostaddr, async, false);
+}
+
+void
+acc_detach_finalize (void **hostaddr)
+{
+ goacc_detach_internal (hostaddr, acc_async_sync, true);
+}
+
+void
+acc_detach_finalize_async (void **hostaddr, int async)
+{
+ goacc_detach_internal (hostaddr, async, true);
+}
+
+/* Some types of (pointer) variables use several consecutive mappings, which
+ must be treated as a group for enter/exit data directives. This function
+ returns the last mapping in such a group (inclusive), or POS for singleton
+ mappings. */
+
+static int
+find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds)
+{
+ unsigned char kind0 = kinds[pos] & 0xff;
+ int first_pos = pos;
+
+ switch (kind0)
+ {
+ case GOMP_MAP_TO_PSET:
+ if (pos + 1 < mapnum
+ && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
+ return pos + 1;
+
+ while (pos + 1 < mapnum
+ && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
+ pos++;
+ /* We expect at least one GOMP_MAP_POINTER (if not a single
+ GOMP_MAP_ATTACH) after a GOMP_MAP_TO_PSET. */
+ assert (pos > first_pos);
+ break;
+
+ case GOMP_MAP_STRUCT:
+ pos += sizes[pos];
+ break;
+
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_ALWAYS_POINTER:
+ /* These mappings are only expected after some other mapping. If we
+ see one by itself, something has gone wrong. */
+ gomp_fatal ("unexpected mapping");
+ break;
+
+ case GOMP_MAP_ATTACH:
+ break;
+
+ default:
+ /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
+ mapping. */
+ if (pos + 1 < mapnum)
+ {
+ unsigned char kind1 = kinds[pos + 1] & 0xff;
+ if (kind1 == GOMP_MAP_ALWAYS_POINTER)
+ return pos + 1;
+ }
+
+ /* We can have a single GOMP_MAP_ATTACH mapping after a to/from
+ mapping. */
+ if (pos + 1 < mapnum
+ && (kinds[pos + 1] & 0xff) == GOMP_MAP_ATTACH)
+ return pos + 1;
+
+ /* We can have zero or more GOMP_MAP_POINTER mappings after a to/from
+ (etc.) mapping. */
+ while (pos + 1 < mapnum
+ && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
+ pos++;
+ }
+
+ return pos;
+}
+
+/* Map variables for OpenACC "enter data". We can't just call
+ gomp_map_vars_async once, because individual mapped variables might have
+ "exit data" called for them at different times. */
+
+static void
+goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
+ void **hostaddrs, size_t *sizes,
+ unsigned short *kinds, goacc_aq aq)
+{
gomp_mutex_lock (&acc_dev->lock);
- if (t->refcount == minrefs)
+ for (size_t i = 0; i < mapnum; i++)
{
- /* This is the last reference, so pull the descriptor off the
- chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
- freeing the device memory. */
- t->tgt_end = 0;
- t->to_free = 0;
+ splay_tree_key n;
+ size_t group_last = find_group_last (i, mapnum, sizes, kinds);
+ bool struct_p = false;
+ size_t size, groupnum = (group_last - i) + 1;
+
+ switch (kinds[i] & 0xff)
+ {
+ case GOMP_MAP_STRUCT:
+ {
+ size = (uintptr_t) hostaddrs[group_last] + sizes[group_last]
+ - (uintptr_t) hostaddrs[i];
+ struct_p = true;
+ }
+ break;
+
+ case GOMP_MAP_ATTACH:
+ size = sizeof (void *);
+ break;
+
+ default:
+ size = sizes[i];
+ }
+
+ n = lookup_host (acc_dev, hostaddrs[i], size);
+
+ if (n && struct_p)
+ {
+ for (size_t j = i + 1; j <= group_last; j++)
+ {
+ struct splay_tree_key_s cur_node;
+ cur_node.host_start = (uintptr_t) hostaddrs[j];
+ cur_node.host_end = cur_node.host_start + sizes[j];
+ splay_tree_key n2
+ = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+ if (!n2
+ || n2->tgt != n->tgt
+ || n2->host_start - n->host_start
+ != n2->tgt_offset - n->tgt_offset)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("Trying to map into device [%p..%p) structure "
+ "element when other mapped elements from the "
+ "same structure weren't mapped together with "
+ "it", (void *) cur_node.host_start,
+ (void *) cur_node.host_end);
+ }
+ }
+ /* This is a special case because we must increment the refcount by
+ the number of mapped struct elements, rather than by one. */
+ if (n->refcount != REFCOUNT_INFINITY)
+ n->refcount += groupnum - 1;
+ n->dynamic_refcount += groupnum - 1;
+ }
+ else if (n && groupnum == 1)
+ {
+ void *h = hostaddrs[i];
+ size_t s = sizes[i];
- for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
- tp = t, t = t->prev)
+ if ((kinds[i] & 0xff) == GOMP_MAP_ATTACH)
+ {
+ gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, n,
+ (uintptr_t) h, s, NULL);
+ /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
+ reference counts ('n->refcount', 'n->dynamic_refcount'). */
+ }
+ else
+ goacc_map_var_existing (acc_dev, h, s, n);
+ }
+ else if (n && groupnum > 1)
{
- if (n->tgt == t)
+ assert (n->refcount != REFCOUNT_INFINITY
+ && n->refcount != REFCOUNT_LINK);
+
+ for (size_t j = i + 1; j <= group_last; j++)
+ if ((kinds[j] & 0xff) == GOMP_MAP_ATTACH)
+ {
+ splay_tree_key m
+ = lookup_host (acc_dev, hostaddrs[j], sizeof (void *));
+ gomp_attach_pointer (acc_dev, aq, &acc_dev->mem_map, m,
+ (uintptr_t) hostaddrs[j], sizes[j], NULL);
+ }
+
+ bool processed = false;
+
+ struct target_mem_desc *tgt = n->tgt;
+ for (size_t j = 0; j < tgt->list_count; j++)
+ if (tgt->list[j].key == n)
+ {
+ /* We are processing a group of mappings (e.g.
+ [GOMP_MAP_TO, GOMP_MAP_TO_PSET, GOMP_MAP_POINTER]).
+ Find the right group in the target_mem_desc's variable
+ list, and increment the refcounts for each item in that
+ group. */
+ for (size_t k = 0; k < groupnum; k++)
+ if (j + k < tgt->list_count
+ && tgt->list[j + k].key
+ && !tgt->list[j + k].is_attach)
+ {
+ tgt->list[j + k].key->refcount++;
+ tgt->list[j + k].key->dynamic_refcount++;
+ }
+ processed = true;
+ break;
+ }
+
+ if (!processed)
{
- if (tp)
- tp->prev = t->prev;
- else
- acc_dev->openacc.data_environ = t->prev;
- break;
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("dynamic refcount incrementing failed for "
+ "pointer/pset");
}
}
+ else if (hostaddrs[i])
+ {
+ /* The data is not mapped already. Map it now, unless the first
+ member in the group has a NULL pointer (e.g. a non-present
+ optional parameter). */
+ gomp_mutex_unlock (&acc_dev->lock);
+
+ struct target_mem_desc *tgt
+ = gomp_map_vars_async (acc_dev, aq, groupnum, &hostaddrs[i], NULL,
+ &sizes[i], &kinds[i], true,
+ (GOMP_MAP_VARS_OPENACC
+ | GOMP_MAP_VARS_ENTER_DATA));
+ assert (tgt);
+
+ gomp_mutex_lock (&acc_dev->lock);
+
+ for (size_t j = 0; j < tgt->list_count; j++)
+ {
+ n = tgt->list[j].key;
+ if (n && !tgt->list[j].is_attach)
+ n->dynamic_refcount++;
+ }
+ }
+
+ i = group_last;
}
- if (force_copyfrom)
- t->list[0]->copy_from = 1;
+ gomp_mutex_unlock (&acc_dev->lock);
+}
+
+/* Unmap variables for OpenACC "exit data". */
+
+static void
+goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
+ void **hostaddrs, size_t *sizes,
+ unsigned short *kinds, goacc_aq aq)
+{
+ gomp_mutex_lock (&acc_dev->lock);
+
+ /* Handle "detach" before copyback/deletion of mapped data. */
+ for (size_t i = 0; i < mapnum; ++i)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+ bool finalize = false;
+ switch (kind)
+ {
+ case GOMP_MAP_FORCE_DETACH:
+ finalize = true;
+ /* Fallthrough. */
+
+ case GOMP_MAP_DETACH:
+ {
+ struct splay_tree_key_s cur_node;
+ uintptr_t hostaddr = (uintptr_t) hostaddrs[i];
+ cur_node.host_start = hostaddr;
+ cur_node.host_end = cur_node.host_start + sizeof (void *);
+ splay_tree_key n
+ = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ {
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("struct not mapped for detach operation");
+ }
+
+ gomp_detach_pointer (acc_dev, aq, n, hostaddr, finalize, NULL);
+ }
+ break;
+ default:
+ ;
+ }
+ }
+
+ for (size_t i = 0; i < mapnum; ++i)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+ switch (kind)
+ {
+ case GOMP_MAP_FROM:
+ case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_TO_PSET:
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_DELETE:
+ case GOMP_MAP_RELEASE:
+ {
+ struct splay_tree_key_s cur_node;
+ size_t size;
+ if (kind == GOMP_MAP_POINTER)
+ size = sizeof (void *);
+ else
+ size = sizes[i];
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start + size;
+ splay_tree_key n
+ = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ continue;
+
+ goacc_exit_datum_1 (acc_dev, hostaddrs[i], size, kind, n, aq);
+ }
+ break;
+
+ case GOMP_MAP_STRUCT:
+ /* Skip the 'GOMP_MAP_STRUCT' itself, and use the regular processing
+ for all its entries. This special handling exists for GCC 10.1
+ compatibility; afterwards, we're not generating these no-op
+ 'GOMP_MAP_STRUCT's anymore. */
+ break;
+
+ case GOMP_MAP_DETACH:
+ case GOMP_MAP_FORCE_DETACH:
+ /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic
+ reference counts ('n->refcount', 'n->dynamic_refcount'). */
+ break;
+
+ default:
+ gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
+ kind);
+ }
+ }
gomp_mutex_unlock (&acc_dev->lock);
+}
- /* If running synchronously, unmap immediately. */
- if (async < acc_async_noval)
- gomp_unmap_vars (t, true);
- else
+void
+GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs,
+ size_t *sizes, unsigned short *kinds, int async,
+ int num_waits, ...)
+{
+ int flags = GOACC_FLAGS_UNMARSHAL (flags_m);
+
+ struct goacc_thread *thr;
+ struct gomp_device_descr *acc_dev;
+ bool data_enter = false;
+ size_t i;
+
+ goacc_lazy_initialize ();
+
+ thr = goacc_thread ();
+ acc_dev = thr->dev;
+
+ /* Determine if this is an "acc enter data". */
+ for (i = 0; i < mapnum; ++i)
{
- gomp_copy_from_async (t);
- acc_dev->openacc.register_async_cleanup_func (t);
+ unsigned char kind = kinds[i] & 0xff;
+
+ if (kind == GOMP_MAP_POINTER
+ || kind == GOMP_MAP_TO_PSET
+ || kind == GOMP_MAP_STRUCT)
+ continue;
+
+ if (kind == GOMP_MAP_FORCE_ALLOC
+ || kind == GOMP_MAP_FORCE_PRESENT
+ || kind == GOMP_MAP_ATTACH
+ || kind == GOMP_MAP_FORCE_TO
+ || kind == GOMP_MAP_TO
+ || kind == GOMP_MAP_ALLOC)
+ {
+ data_enter = true;
+ break;
+ }
+
+ if (kind == GOMP_MAP_RELEASE
+ || kind == GOMP_MAP_DELETE
+ || kind == GOMP_MAP_DETACH
+ || kind == GOMP_MAP_FORCE_DETACH
+ || kind == GOMP_MAP_FROM
+ || kind == GOMP_MAP_FORCE_FROM)
+ break;
+
+ gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
+ kind);
+ }
+
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type
+ = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info enter_exit_data_event_info;
+ if (profiling_p)
+ {
+ enter_exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ enter_exit_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ enter_exit_data_event_info.other_event.parent_construct
+ = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
+ enter_exit_data_event_info.other_event.implicit = 0;
+ enter_exit_data_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ || (flags & GOACC_FLAG_HOST_FALLBACK))
+ {
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
+
+ goto out_prof;
}
- gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
+ if (num_waits)
+ {
+ va_list ap;
+
+ va_start (ap, num_waits);
+ goacc_wait (async, num_waits, &ap);
+ va_end (ap);
+ }
+
+ goacc_aq aq = get_goacc_asyncqueue (async);
+
+ if (data_enter)
+ goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
+ else
+ goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
+
+ out_prof:
+ if (profiling_p)
+ {
+ prof_info.event_type
+ = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
+ enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}