From: Julian Brown Date: Fri, 20 Dec 2019 01:20:16 +0000 (+0000) Subject: OpenACC reference count overhaul X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=378da98fcc907d05002bcd3d6ff7951f0cf485e5;p=gcc.git OpenACC reference count overhaul libgomp/ * libgomp.h (struct splay_tree_key_s): Substitute dynamic_refcount field for virtual_refcount. (enum gomp_map_vars_kind): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA. (gomp_free_memmap): Remove prototype. * oacc-init.c (acc_shutdown_1): Iteratively call gomp_remove_var instead of calling gomp_free_memmap. * oacc-mem.c (acc_map_data): Use virtual_refcount instead of dynamic_refcount. (acc_unmap_data): Open code instead of forcing target_mem_desc's to_free field to NULL then calling gomp_unmap_vars. Handle REFCOUNT_INFINITY on target blocks. (goacc_enter_data): Rename to... (goacc_enter_datum): ...this. Remove MAPNUM parameter and special handling for mapping groups. Use virtual_refcount instead of dynamic_refcount. Use GOMP_MAP_VARS_OPENACC_ENTER_DATA for map_map_vars_async call. Re-do lookup for target pointer return value. (acc_create, acc_create_async, acc_copyin, acc_copyin_async): Call renamed goacc_enter_datum function. (goacc_exit_data): Rename to... (goacc_exit_datum): ...this. Update for virtual_refcount semantics. (acc_delete, acc_delete_async, acc_delete_finalize, acc_delete_finalize_async, acc_copyout, acc_copyout_async, acc_copyout_finalize, acc_copyout_finalize_async): Call renamed goacc_exit_datum function. (gomp_acc_remove_pointer, find_pointer): Remove functions. (find_group_last, goacc_enter_data_internal, goacc_exit_data_internal): New functions. (GOACC_enter_exit_data): Use goacc_enter_data_internal and goacc_exit_data_internal helper functions. * target.c (gomp_map_vars_internal): Handle GOMP_MAP_VARS_OPENACC_ENTER_DATA. Update for virtual_refcount semantics. (gomp_unmap_vars_internal): Update for virtual_refcount semantics. (gomp_load_image_to_device, omp_target_associate_ptr): Zero-initialise virtual_refcount field instead of dynamic_refcount. (gomp_free_memmap): Remove function. * testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: New test. * testsuite/libgomp.c-c++-common/unmap-infinity-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr92843-1.c: Add XFAIL. From-SVN: r279621 --- diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 6382e3634dd..c08d7b0c457 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,45 @@ +2019-12-19 Julian Brown + + * libgomp.h (struct splay_tree_key_s): Substitute dynamic_refcount + field for virtual_refcount. + (enum gomp_map_vars_kind): Add GOMP_MAP_VARS_OPENACC_ENTER_DATA. + (gomp_free_memmap): Remove prototype. + * oacc-init.c (acc_shutdown_1): Iteratively call gomp_remove_var + instead of calling gomp_free_memmap. + * oacc-mem.c (acc_map_data): Use virtual_refcount instead of + dynamic_refcount. + (acc_unmap_data): Open code instead of forcing target_mem_desc's + to_free field to NULL then calling gomp_unmap_vars. Handle + REFCOUNT_INFINITY on target blocks. + (goacc_enter_data): Rename to... + (goacc_enter_datum): ...this. Remove MAPNUM parameter and special + handling for mapping groups. Use virtual_refcount instead of + dynamic_refcount. Use GOMP_MAP_VARS_OPENACC_ENTER_DATA for + map_map_vars_async call. Re-do lookup for target pointer return value. + (acc_create, acc_create_async, acc_copyin, acc_copyin_async): Call + renamed goacc_enter_datum function. + (goacc_exit_data): Rename to... + (goacc_exit_datum): ...this. Update for virtual_refcount semantics. + (acc_delete, acc_delete_async, acc_delete_finalize, + acc_delete_finalize_async, acc_copyout, acc_copyout_async, + acc_copyout_finalize, acc_copyout_finalize_async): Call renamed + goacc_exit_datum function. + (gomp_acc_remove_pointer, find_pointer): Remove functions. + (find_group_last, goacc_enter_data_internal, goacc_exit_data_internal): + New functions. + (GOACC_enter_exit_data): Use goacc_enter_data_internal and + goacc_exit_data_internal helper functions. + * target.c (gomp_map_vars_internal): Handle + GOMP_MAP_VARS_OPENACC_ENTER_DATA. Update for virtual_refcount + semantics. + (gomp_unmap_vars_internal): Update for virtual_refcount semantics. + (gomp_load_image_to_device, omp_target_associate_ptr): Zero-initialise + virtual_refcount field instead of dynamic_refcount. + (gomp_free_memmap): Remove function. + * testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c: New test. + * testsuite/libgomp.c-c++-common/unmap-infinity-2.c: New test. + * testsuite/libgomp.oacc-c-c++-common/pr92843-1.c: Add XFAIL. + 2019-12-19 Julian Brown Thomas Schwinge diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index f982ab38af6..b9301bd70f8 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1007,8 +1007,11 @@ struct splay_tree_key_s { uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; - /* Dynamic reference count. */ - uintptr_t dynamic_refcount; + /* Reference counts beyond those that represent genuine references in the + linked splay tree key/target memory structures, e.g. for multiple OpenACC + "present increment" operations (via "acc enter data") referring to the same + host-memory block. */ + uintptr_t virtual_refcount; struct splay_tree_aux *aux; }; @@ -1139,6 +1142,7 @@ struct gomp_device_descr enum gomp_map_vars_kind { GOMP_MAP_VARS_OPENACC, + GOMP_MAP_VARS_OPENACC_ENTER_DATA, GOMP_MAP_VARS_TARGET, GOMP_MAP_VARS_DATA, GOMP_MAP_VARS_ENTER_DATA @@ -1168,7 +1172,6 @@ extern void gomp_unmap_vars_async (struct target_mem_desc *, bool, struct goacc_asyncqueue *); extern void gomp_init_device (struct gomp_device_descr *); extern bool gomp_fini_device (struct gomp_device_descr *); -extern void gomp_free_memmap (struct splay_tree_s *); extern void gomp_unload_device (struct gomp_device_descr *); extern bool gomp_remove_var (struct gomp_device_descr *, splay_tree_key); extern void gomp_remove_var_async (struct gomp_device_descr *, splay_tree_key, diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index a444c604d59..dd88b58a379 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -370,7 +370,15 @@ acc_shutdown_1 (acc_device_t d) if (walk->dev) { gomp_mutex_lock (&walk->dev->lock); - gomp_free_memmap (&walk->dev->mem_map); + + while (walk->dev->mem_map.root) + { + splay_tree_key k = &walk->dev->mem_map.root->key; + if (k->aux) + k->aux->link_key = NULL; + gomp_remove_var (walk->dev, k); + } + gomp_mutex_unlock (&walk->dev->lock); walk->dev = NULL; diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 15eb17b846e..64168d64017 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -407,7 +407,7 @@ acc_map_data (void *h, void *d, size_t s) assert (tgt); splay_tree_key n = tgt->list[0].key; assert (n->refcount == 1); - assert (n->dynamic_refcount == 0); + assert (n->virtual_refcount == 0); /* Special reference counting behavior. */ n->refcount = REFCOUNT_INFINITY; @@ -435,12 +435,9 @@ acc_unmap_data (void *h) acc_api_info api_info; bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); - size_t host_size; - gomp_mutex_lock (&acc_dev->lock); splay_tree_key n = lookup_host (acc_dev, h, 1); - struct target_mem_desc *t; if (!n) { @@ -448,7 +445,7 @@ acc_unmap_data (void *h) gomp_fatal ("%p is not a mapped block", (void *)h); } - host_size = n->host_end - n->host_start; + size_t host_size = n->host_end - n->host_start; if (n->host_start != (uintptr_t) h) { @@ -457,7 +454,7 @@ acc_unmap_data (void *h) (void *) n->host_start, (int) host_size, (void *) h); } /* TODO This currently doesn't catch 'REFCOUNT_INFINITY' usage different from - 'acc_map_data'. Maybe 'dynamic_refcount' can be used for disambiguating + 'acc_map_data'. Maybe 'virtual_refcount' can be used for disambiguating the different 'REFCOUNT_INFINITY' cases, or simply separate 'REFCOUNT_INFINITY' values per different usage ('REFCOUNT_ACC_MAP_DATA' etc.)? */ @@ -469,19 +466,22 @@ acc_unmap_data (void *h) (void *) h, (int) host_size); } - t = n->tgt; + splay_tree_remove (&acc_dev->mem_map, n); + + struct target_mem_desc *tgt = n->tgt; - if (t->refcount == 1) + if (tgt->refcount == REFCOUNT_INFINITY) { - /* This is the last reference, so pull the descriptor off the - chain. This prevents 'gomp_unmap_tgt' via 'gomp_remove_var' from - freeing the device memory. */ - t->tgt_end = 0; - t->to_free = 0; + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("cannot unmap target block"); + } + else if (tgt->refcount > 1) + tgt->refcount--; + else + { + free (tgt->array); + free (tgt); } - - bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); - assert (is_tgt_unmapped); gomp_mutex_unlock (&acc_dev->lock); @@ -493,29 +493,16 @@ acc_unmap_data (void *h) } -/* Enter dynamic mappings. - - The handling for MAPNUM bigger than one is special handling for - 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. For these, only the first mapping - is considered in reference counting; the following ones implicitly follow - suit. - - If there's just one mapping, return the device pointer. */ +/* Enter dynamic mapping for a single datum. Return the device pointer. */ static void * -goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, - int async) +goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) { void *d; splay_tree_key n; - assert (mapnum > 0); - if (mapnum == 1 - && (!hostaddrs[0] || !sizes[0])) + if (!hostaddrs[0] || !sizes[0]) gomp_fatal ("[%p,+%d] is a bad range", hostaddrs[0], (int) sizes[0]); - else if (mapnum > 1 - && !hostaddrs[0]) - return /* n/a */ (void *) -1; goacc_lazy_initialize (); @@ -523,12 +510,7 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, struct gomp_device_descr *acc_dev = thr->dev; if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) - { - if (mapnum == 1) - return hostaddrs[0]; - else - return /* n/a */ (void *) -1; - } + return hostaddrs[0]; acc_prof_info prof_info; acc_api_info api_info; @@ -542,7 +524,7 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); - if (n && mapnum == 1) + if (n) { void *h = hostaddrs[0]; size_t s = sizes[0]; @@ -558,53 +540,31 @@ goacc_enter_data (size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, assert (n->refcount != REFCOUNT_LINK); if (n->refcount != REFCOUNT_INFINITY) - n->refcount++; - n->dynamic_refcount++; - - gomp_mutex_unlock (&acc_dev->lock); - } - else if (n && mapnum > 1) - { - d = /* n/a */ (void *) -1; - - assert (n->refcount != REFCOUNT_INFINITY - && n->refcount != REFCOUNT_LINK); - - bool processed = false; - - struct target_mem_desc *tgt = n->tgt; - for (size_t i = 0; i < tgt->list_count; i++) - if (tgt->list[i].key == n) - { - for (size_t j = 0; j < mapnum; j++) - if (i + j < tgt->list_count && tgt->list[i + j].key) - { - tgt->list[i + j].key->refcount++; - tgt->list[i + j].key->dynamic_refcount++; - } - processed = true; - } + { + n->refcount++; + n->virtual_refcount++; + } gomp_mutex_unlock (&acc_dev->lock); - if (!processed) - gomp_fatal ("dynamic refcount incrementing failed for pointer/pset"); } else { + const size_t mapnum = 1; + gomp_mutex_unlock (&acc_dev->lock); goacc_aq aq = get_goacc_asyncqueue (async); - struct target_mem_desc *tgt - = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, - kinds, true, GOMP_MAP_VARS_ENTER_DATA); - assert (tgt); - n = tgt->list[0].key; - assert (n->refcount == 1); - assert (n->dynamic_refcount == 0); - n->dynamic_refcount++; + gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, + true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); - d = tgt->to_free; + gomp_mutex_lock (&acc_dev->lock); + n = lookup_host (acc_dev, hostaddrs[0], sizes[0]); + assert (n != NULL); + assert (n->tgt_offset == 0); + assert ((uintptr_t) hostaddrs[0] == n->host_start); + d = (void *) n->tgt->tgt_start; + gomp_mutex_unlock (&acc_dev->lock); } if (profiling_p) @@ -620,14 +580,14 @@ void * acc_create (void *h, size_t s) { unsigned short kinds[1] = { GOMP_MAP_ALLOC }; - return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync); + return goacc_enter_datum (&h, &s, &kinds, acc_async_sync); } void acc_create_async (void *h, size_t s, int async) { unsigned short kinds[1] = { GOMP_MAP_ALLOC }; - goacc_enter_data (1, &h, &s, &kinds, async); + goacc_enter_datum (&h, &s, &kinds, async); } /* acc_present_or_create used to be what acc_create is now. */ @@ -653,14 +613,14 @@ void * acc_copyin (void *h, size_t s) { unsigned short kinds[1] = { GOMP_MAP_TO }; - return goacc_enter_data (1, &h, &s, &kinds, acc_async_sync); + return goacc_enter_datum (&h, &s, &kinds, acc_async_sync); } void acc_copyin_async (void *h, size_t s, int async) { unsigned short kinds[1] = { GOMP_MAP_TO }; - goacc_enter_data (1, &h, &s, &kinds, async); + goacc_enter_datum (&h, &s, &kinds, async); } /* acc_present_or_copyin used to be what acc_copyin is now. */ @@ -683,10 +643,10 @@ acc_pcopyin (void *h, size_t s) #endif -/* Exit a dynamic mapping. */ +/* Exit a dynamic mapping for a single variable. */ static void -goacc_exit_data (void *h, size_t s, unsigned short kind, int async) +goacc_exit_datum (void *h, size_t s, unsigned short kind, int async) { /* No need to call lazy open, as the data must already have been mapped. */ @@ -723,28 +683,23 @@ goacc_exit_data (void *h, size_t s, unsigned short kind, int async) (void *) h, (int) s, (void *) n->host_start, (int) host_size); } - assert (n->refcount != REFCOUNT_LINK); - if (n->refcount != REFCOUNT_INFINITY - && n->refcount < n->dynamic_refcount) - { - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("Dynamic reference counting assert fail\n"); - } - bool finalize = (kind == GOMP_MAP_DELETE || kind == GOMP_MAP_FORCE_FROM); if (finalize) { if (n->refcount != REFCOUNT_INFINITY) - n->refcount -= n->dynamic_refcount; - n->dynamic_refcount = 0; + n->refcount -= n->virtual_refcount; + n->virtual_refcount = 0; } - else if (n->dynamic_refcount) + + if (n->virtual_refcount > 0) { if (n->refcount != REFCOUNT_INFINITY) n->refcount--; - n->dynamic_refcount--; + n->virtual_refcount--; } + else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) + n->refcount--; if (n->refcount == 0) { @@ -785,49 +740,49 @@ goacc_exit_data (void *h, size_t s, unsigned short kind, int async) void acc_delete (void *h , size_t s) { - goacc_exit_data (h, s, GOMP_MAP_RELEASE, acc_async_sync); + goacc_exit_datum (h, s, GOMP_MAP_RELEASE, acc_async_sync); } void acc_delete_async (void *h , size_t s, int async) { - goacc_exit_data (h, s, GOMP_MAP_RELEASE, async); + goacc_exit_datum (h, s, GOMP_MAP_RELEASE, async); } void acc_delete_finalize (void *h , size_t s) { - goacc_exit_data (h, s, GOMP_MAP_DELETE, acc_async_sync); + goacc_exit_datum (h, s, GOMP_MAP_DELETE, acc_async_sync); } void acc_delete_finalize_async (void *h , size_t s, int async) { - goacc_exit_data (h, s, GOMP_MAP_DELETE, async); + goacc_exit_datum (h, s, GOMP_MAP_DELETE, async); } void acc_copyout (void *h, size_t s) { - goacc_exit_data (h, s, GOMP_MAP_FROM, acc_async_sync); + goacc_exit_datum (h, s, GOMP_MAP_FROM, acc_async_sync); } void acc_copyout_async (void *h, size_t s, int async) { - goacc_exit_data (h, s, GOMP_MAP_FROM, async); + goacc_exit_datum (h, s, GOMP_MAP_FROM, async); } void acc_copyout_finalize (void *h, size_t s) { - goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync); + goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, acc_async_sync); } void acc_copyout_finalize_async (void *h, size_t s, int async) { - goacc_exit_data (h, s, GOMP_MAP_FORCE_FROM, async); + goacc_exit_datum (h, s, GOMP_MAP_FORCE_FROM, async); } static void @@ -912,123 +867,145 @@ acc_update_self_async (void *h, size_t s, int async) update_dev_host (0, h, s, async); } +/* Some types of (pointer) variables use several consecutive mappings, which + must be treated as a group for enter/exit data directives. This function + returns the last mapping in such a group (inclusive), or POS for singleton + mappings. */ -/* OpenACC 'enter data', 'exit data': 'GOACC_enter_exit_data' and its helper - functions. */ +static int +find_group_last (int pos, size_t mapnum, unsigned short *kinds) +{ + unsigned char kind0 = kinds[pos] & 0xff; + int first_pos = pos, last_pos = pos; -/* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. + if (kind0 == GOMP_MAP_TO_PSET) + { + while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) + last_pos = ++pos; + /* We expect at least one GOMP_MAP_POINTER after a GOMP_MAP_TO_PSET. */ + assert (last_pos > first_pos); + } + else + { + /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other + mapping. */ + if (pos + 1 < mapnum + && (kinds[pos + 1] & 0xff) == GOMP_MAP_ALWAYS_POINTER) + return pos + 1; + + /* We can have one or several GOMP_MAP_POINTER mappings after a to/from + (etc.) mapping. */ + while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) + last_pos = ++pos; + } - Only the first mapping is considered in reference counting; the following - ones implicitly follow suit. Similarly, 'copyout' is done only for the - first mapping. */ + return last_pos; +} + +/* Map variables for OpenACC "enter data". We can't just call + gomp_map_vars_async once, because individual mapped variables might have + "exit data" called for them at different times. */ static void -goacc_remove_pointer (void *h, size_t s, unsigned short kind, int async) +goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, + void **hostaddrs, size_t *sizes, + unsigned short *kinds, goacc_aq aq) { - kind &= 0xff; - - struct goacc_thread *thr = goacc_thread (); - struct gomp_device_descr *acc_dev = thr->dev; - splay_tree_key n; - struct target_mem_desc *t; - - if (!acc_is_present (h, s)) - return; - - gomp_mutex_lock (&acc_dev->lock); + for (size_t i = 0; i < mapnum; i++) + { + int group_last = find_group_last (i, mapnum, kinds); - n = lookup_host (acc_dev, h, 1); + gomp_map_vars_async (acc_dev, aq, + (group_last - i) + 1, + &hostaddrs[i], NULL, + &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_OPENACC_ENTER_DATA); - if (!n) - { - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("%p is not a mapped block", (void *)h); + i = group_last; } +} - gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); - - t = n->tgt; +/* Unmap variables for OpenACC "exit data". */ - assert (n->refcount != REFCOUNT_INFINITY - && n->refcount != REFCOUNT_LINK); - if (n->refcount < n->dynamic_refcount) - { - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("Dynamic reference counting assert fail\n"); - } +static void +goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, + void **hostaddrs, size_t *sizes, + unsigned short *kinds, goacc_aq aq) +{ + gomp_mutex_lock (&acc_dev->lock); - bool finalize = (kind == GOMP_MAP_DELETE - || kind == GOMP_MAP_FORCE_FROM); - if (finalize) - { - n->refcount -= n->dynamic_refcount; - n->dynamic_refcount = 0; - } - else if (n->dynamic_refcount) + for (size_t i = 0; i < mapnum; ++i) { - n->refcount--; - n->dynamic_refcount--; - } + unsigned char kind = kinds[i] & 0xff; + bool copyfrom = false; + bool finalize = false; - if (n->refcount == 0) - { - goacc_aq aq = get_goacc_asyncqueue (async); + if (kind == GOMP_MAP_FORCE_FROM + || kind == GOMP_MAP_DELETE) + finalize = true; - bool copyout = (kind == GOMP_MAP_FROM - || kind == GOMP_MAP_FORCE_FROM); - if (copyout) + switch (kind) { - void *d = (void *) (t->tgt_start + n->tgt_offset - + (uintptr_t) h - n->host_start); - gomp_copy_dev2host (acc_dev, aq, h, d, s); - } + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_FROM: + case GOMP_MAP_ALWAYS_FROM: + copyfrom = true; + /* Fallthrough. */ + + case GOMP_MAP_TO_PSET: + case GOMP_MAP_POINTER: + case GOMP_MAP_DELETE: + case GOMP_MAP_RELEASE: + { + struct splay_tree_key_s cur_node; + size_t size; + if (kind == GOMP_MAP_POINTER) + size = sizeof (void *); + else + size = sizes[i]; + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + size; + splay_tree_key n + = splay_tree_lookup (&acc_dev->mem_map, &cur_node); + + if (n == NULL) + continue; + + if (finalize) + { + if (n->refcount != REFCOUNT_INFINITY) + n->refcount -= n->virtual_refcount; + n->virtual_refcount = 0; + } - if (aq) - { - /* TODO The way the following code is currently implemented, we need - the 'is_tgt_unmapped' return value from 'gomp_remove_var', so - can't use 'gomp_remove_var_async' here -- see the 'gomp_unref_tgt' - comment in - ; - PR92881 -- so have to synchronize here. */ - if (!acc_dev->openacc.async.synchronize_func (aq)) - { - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("synchronize failed"); - } - } - bool is_tgt_unmapped = false; - for (size_t i = 0; i < t->list_count; i++) - { - is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key); - if (is_tgt_unmapped) - break; + if (n->virtual_refcount > 0) + { + if (n->refcount != REFCOUNT_INFINITY) + n->refcount--; + n->virtual_refcount--; + } + else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) + n->refcount--; + + if (copyfrom + && (kind != GOMP_MAP_FROM || n->refcount == 0)) + gomp_copy_dev2host (acc_dev, aq, (void *) cur_node.host_start, + (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start + - n->host_start), + cur_node.host_end - cur_node.host_start); + + if (n->refcount == 0) + gomp_remove_var_async (acc_dev, n, aq); + } + break; + default: + gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", + kind); } - assert (is_tgt_unmapped); } gomp_mutex_unlock (&acc_dev->lock); - - gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); -} - -/* Return the number of mappings associated with 'GOMP_MAP_TO_PSET' or - 'GOMP_MAP_POINTER'. */ - -static int -find_pointer (int pos, size_t mapnum, unsigned short *kinds) -{ - if (pos + 1 >= mapnum) - return 0; - - unsigned char kind = kinds[pos+1] & 0xff; - - if (kind == GOMP_MAP_TO_PSET) - return 3; - else if (kind == GOMP_MAP_POINTER) - return 2; - - return 0; } void @@ -1147,81 +1124,12 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, va_end (ap); } - /* In c, non-pointers and arrays are represented by a single data clause. - Dynamically allocated arrays and subarrays are represented by a data - clause followed by an internal GOMP_MAP_POINTER. - - In fortran, scalars and not allocated arrays are represented by a - single data clause. Allocated arrays and subarrays have three mappings: - 1) the original data clause, 2) a PSET 3) a pointer to the array data. - */ + goacc_aq aq = get_goacc_asyncqueue (async); if (data_enter) - { - for (i = 0; i < mapnum; i++) - { - /* Scan for pointers and PSETs. */ - int pointer = find_pointer (i, mapnum, kinds); - - if (!pointer) - { - unsigned char kind = kinds[i] & 0xff; - switch (kind) - { - case GOMP_MAP_ALLOC: - case GOMP_MAP_FORCE_ALLOC: - case GOMP_MAP_TO: - case GOMP_MAP_FORCE_TO: - break; - default: - gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", - kind); - break; - } - - /* We actually have one mapping. */ - pointer = 1; - } - - goacc_enter_data (pointer, &hostaddrs[i], &sizes[i], &kinds[i], - async); - /* If applicable, increment 'i' further; OpenACC requires fortran - arrays to be contiguous, so each PSET is associated with - one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and - one MAP_POINTER. */ - i += pointer - 1; - } - } + goacc_enter_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); else - for (i = 0; i < mapnum; ++i) - { - int pointer = find_pointer (i, mapnum, kinds); - - if (!pointer) - { - unsigned char kind = kinds[i] & 0xff; - switch (kind) - { - case GOMP_MAP_RELEASE: - case GOMP_MAP_DELETE: - case GOMP_MAP_FROM: - case GOMP_MAP_FORCE_FROM: - break; - default: - gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x", - kind); - break; - } - - goacc_exit_data (hostaddrs[i], sizes[i], kinds[i], async); - } - else - { - goacc_remove_pointer (hostaddrs[i], sizes[i], kinds[i], async); - /* See the above comment. */ - i += pointer - 1; - } - } + goacc_exit_data_internal (acc_dev, mapnum, hostaddrs, sizes, kinds, aq); out_prof: if (profiling_p) diff --git a/libgomp/target.c b/libgomp/target.c index 5df0a0ff254..f2a60614e59 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -537,8 +537,10 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; - tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; + tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA + || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1; tgt->device_descr = devicep; + tgt->prev = NULL; struct gomp_coalesce_buf cbuf, *cbufp = NULL; if (mapnum == 0) @@ -963,7 +965,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; - k->dynamic_refcount = 0; + k->virtual_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; @@ -1101,8 +1103,20 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* If the variable from "omp target enter data" map-list was already mapped, tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or gomp_exit_data. */ - if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) - { + if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA + || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) + && tgt->refcount == 0) + { + /* If we're about to discard a target_mem_desc with no "structural" + references (tgt->refcount == 0), any splay keys linked in the tgt's + list must have their virtual refcount incremented to represent that + "lost" reference in order to implement the semantics of the OpenACC + "present increment" operation properly. */ + if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) + for (i = 0; i < tgt->list_count; i++) + if (tgt->list[i].key) + tgt->list[i].key->virtual_refcount++; + free (tgt); tgt = NULL; } @@ -1240,7 +1254,14 @@ gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, continue; bool do_unmap = false; - if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + if (k->tgt == tgt + && k->virtual_refcount > 0 + && k->refcount != REFCOUNT_INFINITY) + { + k->virtual_refcount--; + k->refcount--; + } + else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) k->refcount--; else if (k->refcount == 1) { @@ -1405,7 +1426,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_table[i].start; k->refcount = REFCOUNT_INFINITY; - k->dynamic_refcount = 0; + k->virtual_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -1438,7 +1459,7 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_var->start; k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; - k->dynamic_refcount = 0; + k->virtual_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; @@ -1673,22 +1694,6 @@ gomp_unload_device (struct gomp_device_descr *devicep) } } -/* Free address mapping tables. MM must be locked on entry, and remains locked - on return. */ - -attribute_hidden void -gomp_free_memmap (struct splay_tree_s *mem_map) -{ - while (mem_map->root) - { - struct target_mem_desc *tgt = mem_map->root->key.tgt; - - splay_tree_remove (mem_map, &mem_map->root->key); - free (tgt->array); - free (tgt); - } -} - /* Host fallback for GOMP_target{,_ext} routines. */ static void @@ -2700,7 +2705,7 @@ omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, k->tgt = tgt; k->tgt_offset = (uintptr_t) device_ptr + device_offset; k->refcount = REFCOUNT_INFINITY; - k->dynamic_refcount = 0; + k->virtual_refcount = 0; k->aux = NULL; array->left = NULL; array->right = NULL; diff --git a/libgomp/testsuite/libgomp.c-c++-common/unmap-infinity-2.c b/libgomp/testsuite/libgomp.c-c++-common/unmap-infinity-2.c new file mode 100644 index 00000000000..3931c5aba25 --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/unmap-infinity-2.c @@ -0,0 +1,19 @@ +int foo[16]; +#pragma omp declare target (foo) + +__attribute__((used)) void bar (void) +{ + #pragma omp target parallel for + for (int i = 0; i < 16; i++) + foo[i] = i; +} + +int +main (int argc, char *argv[]) +{ + int *foo_copy = foo; + /* Try to trigger the unmapping of a REFCOUNT_INFINITY target block. This + does nothing at the time of writing. */ + #pragma omp target exit data map(delete: foo_copy[0:16]) + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c index db5b35b08d9..f16c46a37bf 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92843-1.c @@ -1,6 +1,7 @@ /* Verify that 'acc_copyout' etc. is a no-op if there's still a structured reference count. */ +/* { dg-xfail-run-if "TODO PR92843" { *-*-* } } */ /* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ #include diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c new file mode 100644 index 00000000000..872f0c1de5c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/unmap-infinity-1.c @@ -0,0 +1,17 @@ +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ + +#include + +int foo[16]; +#pragma acc declare device_resident(foo) + +int +main (int argc, char *argv[]) +{ + acc_init (acc_device_default); + acc_unmap_data ((void *) foo); +/* { dg-output "libgomp: cannot unmap target block" } */ + return 0; +} + +/* { dg-shouldfail "" } */