assert (tgt);
splay_tree_key n = tgt->list[0].key;
assert (n->refcount == 1);
- assert (n->dynamic_refcount == 0);
+ assert (n->virtual_refcount == 0);
/* Special reference counting behavior. */
n->refcount = REFCOUNT_INFINITY;
acc_api_info api_info;
bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
- size_t host_size;
-
gomp_mutex_lock (&acc_dev->lock);
splay_tree_key n = lookup_host (acc_dev, h, 1);
- struct target_mem_desc *t;
if (!n)
{
gomp_fatal ("%p is not a mapped block", (void *)h);
}
- host_size = n->host_end - n->host_start;
+ size_t host_size = n->host_end - n->host_start;
if (n->host_start != (uintptr_t) h)
{
(void *) n->host_start, (int) host_size, (void *) h);
}
/* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from
- 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating
+ 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating
the different 'REFCOUNT_INFINITY' cases, or simply separate
'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA'
etc.)? */
(void *) h, (int) host_size);
}
- t = n->tgt;
+ splay_tree_remove (&acc_dev->mem_map, n);
+
+ struct target_mem_desc *tgt = n->tgt;
- if (t->refcount == 1)
+ if (tgt->refcount == REFCOUNT_INFINITY)
{
- /* This is the last reference, so pull the descriptor off the
- chain. This prevents 'gomp_unmap_tgt' via 'gomp_remove_var' from
- freeing the device memory. */
- t->tgt_end = 0;
- t->to_free = 0;
+ gomp_mutex_unlock (&acc_dev->lock);
+ gomp_fatal ("cannot unmap target block");
+ }
+ else if (tgt->refcount > 1)
+ tgt->refcount--;
+ else
+ {
+ free (tgt->array);
+ free (tgt);
}
-
- bool is_tgt_unmapped = gomp_remove_var (acc_dev, n);
- assert (is_tgt_unmapped);
gomp_mutex_unlock (&acc_dev->lock);
}
-/* Enter dynamic mappings.
-
- The handling for MAPNUM bigger than one is special handling for
- 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. For these, only the first mapping
- is considered in reference counting; the following ones implicitly follow
- suit.
-
- If there's just one mapping, return the device pointer. */
+/* Enter dynamic mapping for a single datum. Return the device pointer. */
static void *
-goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds,
- int async)
+goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async)
{
void *d;
splay_tree_key n;
- assert (mapnum > 0);
- if (mapnum == 1
- && (!hostaddrs[0] || !sizes[0]))
+ if (!hostaddrs[0] || !sizes[0])
gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]);
- else if (mapnum > 1
- && !hostaddrs[0])
- return /* n/a */ (void *) -1;
goacc_lazy_initialize ();
struct gomp_device_descr *acc_dev = thr->dev;
if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
- {
- if (mapnum == 1)
- return hostaddrs[0];
- else
- return /* n/a */ (void *) -1;
- }
+ return hostaddrs[0];
acc_prof_info prof_info;
acc_api_info api_info;
gomp_mutex_lock (&acc_dev->lock);
n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
- if (n && mapnum == 1)
+ if (n)
{
void *h = hostaddrs[0];
size_t s = sizes[0];
assert (n->refcount != REFCOUNT_LINK);
if (n->refcount != REFCOUNT_INFINITY)
- n->refcount++;
- n->dynamic_refcount++;
-
- gomp_mutex_unlock (&acc_dev->lock);
- }
- else if (n && mapnum > 1)
- {
- d = /* n/a */ (void *) -1;
-
- assert (n->refcount != REFCOUNT_INFINITY
- && n->refcount != REFCOUNT_LINK);
-
- bool processed = false;
-
- struct target_mem_desc *tgt = n->tgt;
- for (size_t i = 0; i < tgt->list_count; i++)
- if (tgt->list[i].key == n)
- {
- for (size_t j = 0; j < mapnum; j++)
- if (i + j < tgt->list_count && tgt->list[i + j].key)
- {
- tgt->list[i + j].key->refcount++;
- tgt->list[i + j].key->dynamic_refcount++;
- }
- processed = true;
- }
+ {
+ n->refcount++;
+ n->virtual_refcount++;
+ }
gomp_mutex_unlock (&acc_dev->lock);
- if (!processed)
- gomp_fatal ("dynamic refcount incrementing failed for pointer/pset");
}
else
{
+ const size_t mapnum = 1;
+
gomp_mutex_unlock (&acc_dev->lock);
goacc_aq aq = get_goacc_asyncqueue (async);
- struct target_mem_desc *tgt
- = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes,
- kinds, true, GOMP_MAP_VARS_ENTER_DATA);
- assert (tgt);
- n = tgt->list[0].key;
- assert (n->refcount == 1);
- assert (n->dynamic_refcount == 0);
- n->dynamic_refcount++;
+ gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
+ true, GOMP_MAP_VARS_OPENACC_ENTER_DATA);
- d = tgt->to_free;
+ gomp_mutex_lock (&acc_dev->lock);
+ n = lookup_host (acc_dev, hostaddrs[0], sizes[0]);
+ assert (n != NULL);
+ assert (n->tgt_offset == 0);
+ assert ((uintptr_t) hostaddrs[0] == n->host_start);
+ d = (void *) n->tgt->tgt_start;
+ gomp_mutex_unlock (&acc_dev->lock);
}
if (profiling_p)
acc_create (void *h, size_t s)
{
unsigned short kinds[1] = { GOMP_MAP_ALLOC };
- return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync);
+ return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
}
void
acc_create_async (void *h, size_t s, int async)
{
unsigned short kinds[1] = { GOMP_MAP_ALLOC };
- goacc_enter_data (1, &h, &s, &kinds, async);
+ goacc_enter_datum (&h, &s, &kinds, async);
}
/* acc_present_or_create used to be what acc_create is now. */
acc_copyin (void *h, size_t s)
{
unsigned short kinds[1] = { GOMP_MAP_TO };
- return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync);
+ return goacc_enter_datum (&h, &s, &kinds, acc_async_sync);
}
void
acc_copyin_async (void *h, size_t s, int async)
{
unsigned short kinds[1] = { GOMP_MAP_TO };
- goacc_enter_data (1, &h, &s, &kinds, async);
+ goacc_enter_datum (&h, &s, &kinds, async);
}
/* acc_present_or_copyin used to be what acc_copyin is now. */
#endif
-/* Exit a dynamic mapping. */
+/* Exit a dynamic mapping for a single variable. */
static void
-goacc_exit_data (void *h, size_t s, unsigned short kind, int async)
+goacc_exit_datum (void *h, size_t s, unsigned short kind, int async)
{
/* No need to call lazy open, as the data must already have been
mapped. */
(void *) h, (int) s, (void *) n->host_start, (int) host_size);
}
- assert (n->refcount != REFCOUNT_LINK);
- if (n->refcount != REFCOUNT_INFINITY
- && n->refcount < n->dynamic_refcount)
- {
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("Dynamic reference counting assert fail\n");
- }
-
bool finalize = (kind == GOMP_MAP_DELETE
|| kind == GOMP_MAP_FORCE_FROM);
if (finalize)
{
if (n->refcount != REFCOUNT_INFINITY)
- n->refcount -= n->dynamic_refcount;
- n->dynamic_refcount = 0;
+ n->refcount -= n->virtual_refcount;
+ n->virtual_refcount = 0;
}
- else if (n->dynamic_refcount)
+
+ if (n->virtual_refcount > 0)
{
if (n->refcount != REFCOUNT_INFINITY)
n->refcount--;
- n->dynamic_refcount--;
+ n->virtual_refcount--;
}
+ else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
+ n->refcount--;
if (n->refcount == 0)
{
void
acc_delete (void *h , size_t s)
{
- goacc_exit_data (h, s, GOMP_MAP_RELEASE, acc_async_sync);
+ goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync);
}
void
acc_delete_async (void *h , size_t s, int async)
{
- goacc_exit_data (h, s, GOMP_MAP_RELEASE, async);
+ goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async);
}
void
acc_delete_finalize (void *h , size_t s)
{
- goacc_exit_data (h, s, GOMP_MAP_DELETE, acc_async_sync);
+ goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync);
}
void
acc_delete_finalize_async (void *h , size_t s, int async)
{
- goacc_exit_data (h, s, GOMP_MAP_DELETE, async);
+ goacc_exit_datum (h, s, GOMP_MAP_DELETE, async);
}
void
acc_copyout (void *h, size_t s)
{
- goacc_exit_data (h, s, GOMP_MAP_FROM, acc_async_sync);
+ goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync);
}
void
acc_copyout_async (void *h, size_t s, int async)
{
- goacc_exit_data (h, s, GOMP_MAP_FROM, async);
+ goacc_exit_datum (h, s, GOMP_MAP_FROM, async);
}
void
acc_copyout_finalize (void *h, size_t s)
{
- goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
+ goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync);
}
void
acc_copyout_finalize_async (void *h, size_t s, int async)
{
- goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, async);
+ goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async);
}
static void
update_dev_host (0, h, s, async);
}
+/* Some types of (pointer) variables use several consecutive mappings, which
+ must be treated as a group for enter/exit data directives. This function
+ returns the last mapping in such a group (inclusive), or POS for singleton
+ mappings. */
-/* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper
- functions. */
+static int
+find_group_last (int pos, size_t mapnum, unsigned short *kinds)
+{
+ unsigned char kind0 = kinds[pos] & 0xff;
+ int first_pos = pos, last_pos = pos;
-/* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'.
+ if (kind0 == GOMP_MAP_TO_PSET)
+ {
+ while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
+ last_pos = ++pos;
+ /* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */
+ assert (last_pos > first_pos);
+ }
+ else
+ {
+ /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other
+ mapping. */
+ if (pos + 1 < mapnum
+ && (kinds[pos + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER)
+ return pos + 1;
+
+ /* We can have one or several GOMP_MAP_POINTER mappings after a to/from
+ (etc.) mapping. */
+ while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER)
+ last_pos = ++pos;
+ }
- Only the first mapping is considered in reference counting; the following
- ones implicitly follow suit. Similarly, 'copyout' is done only for the
- first mapping. */
+ return last_pos;
+}
+
+/* Map variables for OpenACC "enter data". We can't just call
+ gomp_map_vars_async once, because individual mapped variables might have
+ "exit data" called for them at different times. */
static void
-goacc_remove_pointer (void *h, size_t s, unsigned short kind, int async)
+goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
+ void **hostaddrs, size_t *sizes,
+ unsigned short *kinds, goacc_aq aq)
{
- kind &= 0xff;
-
- struct goacc_thread *thr = goacc_thread ();
- struct gomp_device_descr *acc_dev = thr->dev;
- splay_tree_key n;
- struct target_mem_desc *t;
-
- if (!acc_is_present (h, s))
- return;
-
- gomp_mutex_lock (&acc_dev->lock);
+ for (size_t i = 0; i < mapnum; i++)
+ {
+ int group_last = find_group_last (i, mapnum, kinds);
- n = lookup_host (acc_dev, h, 1);
+ gomp_map_vars_async (acc_dev, aq,
+ (group_last - i) + 1,
+ &hostaddrs[i], NULL,
+ &sizes[i], &kinds[i], true,
+ GOMP_MAP_VARS_OPENACC_ENTER_DATA);
- if (!n)
- {
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("%p is not a mapped block", (void *)h);
+ i = group_last;
}
+}
- gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
-
- t = n->tgt;
+/* Unmap variables for OpenACC "exit data". */
- assert (n->refcount != REFCOUNT_INFINITY
- && n->refcount != REFCOUNT_LINK);
- if (n->refcount < n->dynamic_refcount)
- {
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("Dynamic reference counting assert fail\n");
- }
+static void
+goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum,
+ void **hostaddrs, size_t *sizes,
+ unsigned short *kinds, goacc_aq aq)
+{
+ gomp_mutex_lock (&acc_dev->lock);
- bool finalize = (kind == GOMP_MAP_DELETE
- || kind == GOMP_MAP_FORCE_FROM);
- if (finalize)
- {
- n->refcount -= n->dynamic_refcount;
- n->dynamic_refcount = 0;
- }
- else if (n->dynamic_refcount)
+ for (size_t i = 0; i < mapnum; ++i)
{
- n->refcount--;
- n->dynamic_refcount--;
- }
+ unsigned char kind = kinds[i] & 0xff;
+ bool copyfrom = false;
+ bool finalize = false;
- if (n->refcount == 0)
- {
- goacc_aq aq = get_goacc_asyncqueue (async);
+ if (kind == GOMP_MAP_FORCE_FROM
+ || kind == GOMP_MAP_DELETE)
+ finalize = true;
- bool copyout = (kind == GOMP_MAP_FROM
- || kind == GOMP_MAP_FORCE_FROM);
- if (copyout)
+ switch (kind)
{
- void *d = (void *) (t->tgt_start + n->tgt_offset
- + (uintptr_t) h - n->host_start);
- gomp_copy_dev2host (acc_dev, aq, h, d, s);
- }
+ case GOMP_MAP_FROM:
+ case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_ALWAYS_FROM:
+ copyfrom = true;
+ /* Fallthrough. */
+
+ case GOMP_MAP_TO_PSET:
+ case GOMP_MAP_POINTER:
+ case GOMP_MAP_DELETE:
+ case GOMP_MAP_RELEASE:
+ {
+ struct splay_tree_key_s cur_node;
+ size_t size;
+ if (kind == GOMP_MAP_POINTER)
+ size = sizeof (void *);
+ else
+ size = sizes[i];
+ cur_node.host_start = (uintptr_t) hostaddrs[i];
+ cur_node.host_end = cur_node.host_start + size;
+ splay_tree_key n
+ = splay_tree_lookup (&acc_dev->mem_map, &cur_node);
+
+ if (n == NULL)
+ continue;
+
+ if (finalize)
+ {
+ if (n->refcount != REFCOUNT_INFINITY)
+ n->refcount -= n->virtual_refcount;
+ n->virtual_refcount = 0;
+ }
- if (aq)
- {
- /* TODO The way the following code is currently implemented, we need
- the 'is_tgt_unmapped' return value from 'gomp_remove_var', so
- can't use 'gomp_remove_var_async' here -- see the 'gomp_unref_tgt'
- comment in
- <http://mid.mail-archive.com/878snl36eu.fsf@euler.schwinge.homeip.net>;
- PR92881 -- so have to synchronize here. */
- if (!acc_dev->openacc.async.synchronize_func (aq))
- {
- gomp_mutex_unlock (&acc_dev->lock);
- gomp_fatal ("synchronize failed");
- }
- }
- bool is_tgt_unmapped = false;
- for (size_t i = 0; i < t->list_count; i++)
- {
- is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key);
- if (is_tgt_unmapped)
- break;
+ if (n->virtual_refcount > 0)
+ {
+ if (n->refcount != REFCOUNT_INFINITY)
+ n->refcount--;
+ n->virtual_refcount--;
+ }
+ else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY)
+ n->refcount--;
+
+ if (copyfrom
+ && (kind != GOMP_MAP_FROM || n->refcount == 0))
+ gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start,
+ (void *) (n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start
+ - n->host_start),
+ cur_node.host_end - cur_node.host_start);
+
+ if (n->refcount == 0)
+ gomp_remove_var_async (acc_dev, n, aq);
+ }
+ break;
+ default:
+ gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x",
+ kind);
}
- assert (is_tgt_unmapped);
}
gomp_mutex_unlock (&acc_dev->lock);
-
- gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
-}
-
-/* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or
- 'GOMP_MAP_POINTER'. */
-
-static int
-find_pointer (int pos, size_t mapnum, unsigned short *kinds)
-{
- if (pos + 1 >= mapnum)
- return 0;
-
- unsigned char kind = kinds[pos+1] & 0xff;
-
- if (kind == GOMP_MAP_TO_PSET)
- return 3;
- else if (kind == GOMP_MAP_POINTER)
- return 2;
-
- return 0;
}
void
va_end (ap);
}
- /* In c, non-pointers and arrays are represented by a single data clause.
- Dynamically allocated arrays and subarrays are represented by a data
- clause followed by an internal GOMP_MAP_POINTER.
-
- In fortran, scalars and not allocated arrays are represented by a
- single data clause. Allocated arrays and subarrays have three mappings:
- 1) the original data clause, 2) a PSET 3) a pointer to the array data.
- */
+ goacc_aq aq = get_goacc_asyncqueue (async);
if (data_enter)
- {
- for (i = 0; i < mapnum; i++)
- {
- /* Scan for pointers and PSETs. */
- int pointer = find_pointer (i, mapnum, kinds);
-
- if (!pointer)
- {
- unsigned char kind = kinds[i] & 0xff;
- switch (kind)
- {
- case GOMP_MAP_ALLOC:
- case GOMP_MAP_FORCE_ALLOC:
- case GOMP_MAP_TO:
- case GOMP_MAP_FORCE_TO:
- break;
- default:
- gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
- kind);
- break;
- }
-
- /* We actually have one mapping. */
- pointer = 1;
- }
-
- goacc_enter_data (pointer, &hostaddrs[i], &sizes[i], &kinds[i],
- async);
- /* If applicable, increment 'i' further; OpenACC requires fortran
- arrays to be contiguous, so each PSET is associated with
- one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
- one MAP_POINTER. */
- i += pointer - 1;
- }
- }
+ goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
else
- for (i = 0; i < mapnum; ++i)
- {
- int pointer = find_pointer (i, mapnum, kinds);
-
- if (!pointer)
- {
- unsigned char kind = kinds[i] & 0xff;
- switch (kind)
- {
- case GOMP_MAP_RELEASE:
- case GOMP_MAP_DELETE:
- case GOMP_MAP_FROM:
- case GOMP_MAP_FORCE_FROM:
- break;
- default:
- gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
- kind);
- break;
- }
-
- goacc_exit_data (hostaddrs[i], sizes[i], kinds[i], async);
- }
- else
- {
- goacc_remove_pointer (hostaddrs[i], sizes[i], kinds[i], async);
- /* See the above comment. */
- i += pointer - 1;
- }
- }
+ goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq);
out_prof:
if (profiling_p)