From: Thomas Schwinge Date: Wed, 18 Dec 2019 17:00:39 +0000 (+0100) Subject: [PR92848] [OpenACC] Use 'GOMP_MAP_VARS_ENTER_DATA' for dynamic data lifetimes X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ba40277f6ac96ceb982120ce95d2b64695a25dff;p=gcc.git [PR92848] [OpenACC] Use 'GOMP_MAP_VARS_ENTER_DATA' for dynamic data lifetimes libgomp/ PR libgomp/92848 * oacc-mem.c (acc_map_data, present_create_copy) (goacc_insert_pointer): Use 'GOMP_MAP_VARS_ENTER_DATA'. (acc_unmap_data, delete_copyout, goacc_remove_pointer): Adjust. * testsuite/libgomp.oacc-c-c++-common/lib-50.c: Remove. * testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-a.c: New file * testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-p.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-a.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-p.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/subset-subarray-mappings-1-r-p.c: Remove "XFAIL"s. From-SVN: r279530 --- diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 5bd1c648ffe..d9aba5bee18 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,5 +1,17 @@ 2019-12-18 Thomas Schwinge + PR libgomp/92848 + * oacc-mem.c (acc_map_data, present_create_copy) + (goacc_insert_pointer): Use 'GOMP_MAP_VARS_ENTER_DATA'. + (acc_unmap_data, delete_copyout, goacc_remove_pointer): Adjust. + * testsuite/libgomp.oacc-c-c++-common/lib-50.c: Remove. + * testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-a.c: New file + * testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-p.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-a.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-p.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/subset-subarray-mappings-1-r-p.c: + Remove "XFAIL"s. + * target.c (gomp_unmap_tgt): Make it 'static'. * libgomp.h (gomp_unmap_tgt): Remove. diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 196b7e2a520..54427982341 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -403,7 +403,8 @@ acc_map_data (void *h, void *d, size_t s) gomp_mutex_unlock (&acc_dev->lock); tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, GOMP_MAP_VARS_OPENACC); + &kinds, true, GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); splay_tree_key n = tgt->list[0].key; assert (n->refcount == 1); assert (n->dynamic_refcount == 0); @@ -468,23 +469,21 @@ acc_unmap_data (void *h) (void *) h, (int) host_size); } - /* Mark for removal. */ - n->refcount = 1; - t = n->tgt; - if (t->refcount == 2) + if (t->refcount == 1) { /* This is the last reference, so pull the descriptor off the - chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from + chain. This prevents 'gomp_unmap_tgt' via 'gomp_remove_var' from freeing the device memory. */ t->tgt_end = 0; t->to_free = 0; } - gomp_mutex_unlock (&acc_dev->lock); + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (is_tgt_unmapped); - gomp_unmap_vars (t, true); + gomp_mutex_unlock (&acc_dev->lock); if (profiling_p) { @@ -572,7 +571,8 @@ present_create_copy (unsigned f, void *h, size_t s, int async) goacc_aq aq = get_goacc_asyncqueue (async); tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, - &kinds, true, GOMP_MAP_VARS_OPENACC); + &kinds, true, GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); n = tgt->list[0].key; assert (n->refcount == 1); assert (n->dynamic_refcount == 0); @@ -727,7 +727,18 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) + (uintptr_t) h - n->host_start); gomp_copy_dev2host (acc_dev, aq, h, d, s); } - gomp_remove_var_async (acc_dev, n, aq); + + if (aq) + /* TODO We can't do the 'is_tgt_unmapped' checking -- see the + 'gomp_unref_tgt' comment in + ; + PR92881. */ + gomp_remove_var_async (acc_dev, n, aq); + else + { + bool is_tgt_unmapped = gomp_remove_var (acc_dev, n); + assert (is_tgt_unmapped); + } } gomp_mutex_unlock (&acc_dev->lock); @@ -877,7 +888,8 @@ acc_update_self_async (void *h, size_t s, int async) /* Special handling for 'GOMP_MAP_POINTER', 'GOMP_MAP_TO_PSET'. Only the first mapping is considered in reference counting; the following - ones implicitly follow suit. */ + ones implicitly follow suit. Similarly, 'copyout' ('force_copyfrom') is + done only for the first mapping. */ static void goacc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, @@ -918,7 +930,8 @@ goacc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); goacc_aq aq = get_goacc_asyncqueue (async); tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, - NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); + NULL, sizes, kinds, true, GOMP_MAP_VARS_ENTER_DATA); + assert (tgt); splay_tree_key n = tgt->list[0].key; assert (n->refcount == 1); assert (n->dynamic_refcount == 0); @@ -928,13 +941,12 @@ goacc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, static void goacc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, - int finalize, int mapnum) + int finalize) { struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; splay_tree_key n; struct target_mem_desc *t; - int minrefs = (mapnum == 1) ? 2 : 3; if (!acc_is_present (h, s)) return; @@ -972,28 +984,40 @@ goacc_remove_pointer (void *h, size_t s, bool force_copyfrom, int async, n->dynamic_refcount--; } - gomp_mutex_unlock (&acc_dev->lock); - if (n->refcount == 0) { - /* Set refcount to 1 to allow gomp_unmap_vars to unmap it. */ - n->refcount = 1; - t->refcount = minrefs; - for (size_t i = 0; i < t->list_count; i++) - if (t->list[i].key == n) - { - t->list[i].copy_from = force_copyfrom ? 1 : 0; - break; - } + goacc_aq aq = get_goacc_asyncqueue (async); - /* If running synchronously, unmap immediately. */ - if (async < acc_async_noval) - gomp_unmap_vars (t, true); - else + if (force_copyfrom) + { + void *d = (void *) (t->tgt_start + n->tgt_offset + + (uintptr_t) h - n->host_start); + + gomp_copy_dev2host (acc_dev, aq, h, d, s); + } + + if (aq) { - goacc_aq aq = get_goacc_asyncqueue (async); - gomp_unmap_vars_async (t, true, aq); + /* TODO The way the following code is currently implemented, we need + the 'is_tgt_unmapped' return value from 'gomp_remove_var', so + can't use 'gomp_remove_var_async' here -- see the 'gomp_unref_tgt' + comment in + ; + PR92881 -- so have to synchronize here. */ + if (!acc_dev->openacc.async.synchronize_func (aq)) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_fatal ("synchronize failed"); + } + } + bool is_tgt_unmapped = false; + for (size_t i = 0; i < t->list_count; i++) + { + is_tgt_unmapped = gomp_remove_var (acc_dev, t->list[i].key); + if (is_tgt_unmapped) + break; } + assert (is_tgt_unmapped); } gomp_mutex_unlock (&acc_dev->lock); @@ -1234,7 +1258,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, void **hostaddrs, bool copyfrom = (kind == GOMP_MAP_FORCE_FROM || kind == GOMP_MAP_FROM); goacc_remove_pointer (hostaddrs[i], sizes[i], copyfrom, async, - finalize, pointer); + finalize); /* See the above comment. */ i += pointer - 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c deleted file mode 100644 index e8294e1af36..00000000000 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-50.c +++ /dev/null @@ -1,30 +0,0 @@ -/* { dg-do run } */ -/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ - -#include -#include - -int -main (int argc, char **argv) -{ - const int N = 256; - unsigned char *h; - void *d; - - h = (unsigned char *) malloc (N); - - d = acc_malloc (N); - - acc_map_data (h, d, N); - - if (acc_is_present (h, N) != 1) - abort (); - - acc_unmap_data (h); - - acc_free (d); - - free (h); - - return 0; -} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-a.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-a.c new file mode 100644 index 00000000000..6fe6a9af8c2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-a.c @@ -0,0 +1,7 @@ +/* Verify device memory allocation/deallocation + { dg-additional-options "-DOPENACC_DIRECTIVES" } using OpenACC directives, + { dg-additional-options "-DARRAYS" } using arrays. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include "pr92848-1-r-p.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-p.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-p.c new file mode 100644 index 00000000000..2228b2d5d60 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-d-p.c @@ -0,0 +1,7 @@ +/* Verify device memory allocation/deallocation + { dg-additional-options "-DOPENACC_DIRECTIVES" } using OpenACC directives, + { dg-additional-options "-DPOINTERS" } using pointers. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include "pr92848-1-r-p.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-a.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-a.c new file mode 100644 index 00000000000..3f5f0acbcf0 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-a.c @@ -0,0 +1,7 @@ +/* Verify device memory allocation/deallocation + { dg-additional-options "-DOPENACC_RUNTIME" } using OpenACC Runtime Library routines, + { dg-additional-options "-DARRAYS" } using arrays. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#include "pr92848-1-r-p.c" diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-p.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-p.c new file mode 100644 index 00000000000..95565ba1cb2 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr92848-1-r-p.c @@ -0,0 +1,321 @@ +/* Verify device memory allocation/deallocation + { dg-additional-options "-DOPENACC_RUNTIME" } using OpenACC Runtime Library routines, + { dg-additional-options "-DPOINTERS" } using pointers. */ + +/* { dg-skip-if "" { *-*-* } { "-DACC_MEM_SHARED=1" } } */ + +#if OPENACC_RUNTIME +#elif OPENACC_DIRECTIVES +#else +# error +#endif + +#if POINTERS +#elif ARRAYS +#else +# error +#endif + + +#include +#include +#include +#include +#include +#include +#include + + +static bool cb_ev_alloc_expected; +static size_t cb_ev_alloc_bytes; +static const void *cb_ev_alloc_device_ptr; +static void +cb_ev_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) +{ + assert (cb_ev_alloc_expected); + cb_ev_alloc_expected = false; + + cb_ev_alloc_bytes = event_info->data_event.bytes; + cb_ev_alloc_device_ptr = event_info->data_event.device_ptr; +} + +static bool cb_ev_free_expected; +static const void *cb_ev_free_device_ptr; +static void +cb_ev_free (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) +{ + assert (cb_ev_free_expected); + cb_ev_free_expected = false; + + cb_ev_free_device_ptr = event_info->data_event.device_ptr; +} + + +/* Match the alignment processing that + 'libgomp/target.c:gomp_map_vars_internal' is doing; simplified, not + considering special alignment requirements of certain data types. */ + +static size_t +aligned_size (size_t tgt_size) +{ + size_t tgt_align = sizeof (void *); + return tgt_size + tgt_align - 1; +} + +static const void * +aligned_address (const void *tgt_start) +{ + size_t tgt_align = sizeof (void *); + return (void *) (((uintptr_t) tgt_start + tgt_align - 1) & ~(tgt_align - 1)); +} + + +#define SIZE 1024 +#define SUBSET 32 + + +/* A "create", [...], "delete" sequence. */ + +static void +f1 (void) +{ + cb_ev_alloc_expected = false; + cb_ev_free_expected = false; + acc_prof_register (acc_ev_alloc, cb_ev_alloc, acc_reg); + acc_prof_register (acc_ev_free, cb_ev_free, acc_reg); + +#if POINTERS + char *h = (char *) malloc (SIZE); +#else + char h[SIZE]; +#endif + + void *d; + cb_ev_alloc_expected = true; +#if OPENACC_RUNTIME + d = acc_create (h, SIZE); +#else +# if POINTERS +# pragma acc enter data create (h[0:SIZE]) +# else +# pragma acc enter data create (h) +# endif + d = acc_deviceptr (h); +#endif + assert (d); + assert (!cb_ev_alloc_expected); + assert (cb_ev_alloc_bytes == aligned_size (SIZE)); + assert (aligned_address (cb_ev_alloc_device_ptr) == d); + assert (acc_is_present (h, SIZE)); + +#if OPENACC_RUNTIME + acc_create (h, SIZE); +#else +# if POINTERS +# pragma acc enter data create (h[0:SIZE]) +# else +# pragma acc enter data create (h) +# endif +#endif + +#if POINTERS +# pragma acc data create (h[0:SIZE]) + ; +#else +# pragma acc data create (h) + ; +#endif + assert (acc_is_present (h, SIZE)); + +#if OPENACC_RUNTIME + acc_delete (h, SIZE); +#else +# if POINTERS +# pragma acc exit data delete (h[0:SIZE]) +# else +# pragma acc exit data delete (h) +# endif +#endif + assert (acc_is_present (h, SIZE)); + + cb_ev_free_expected = true; +#if OPENACC_RUNTIME + acc_delete (h, SIZE); +#else +# if POINTERS +# pragma acc exit data delete (h[0:SIZE]) +# else +# pragma acc exit data delete (h) +# endif +#endif + assert (!cb_ev_free_expected); + assert (cb_ev_free_device_ptr == cb_ev_alloc_device_ptr); + assert (!acc_is_present (h, SIZE)); + +#if POINTERS + free (h); +#endif + + acc_prof_unregister (acc_ev_alloc, cb_ev_alloc, acc_reg); + acc_prof_unregister (acc_ev_free, cb_ev_free, acc_reg); +} + + +/* A "map", [...] "unmap" sequence. */ + +static void +f2 (void) +{ + cb_ev_alloc_expected = false; + cb_ev_free_expected = false; + acc_prof_register (acc_ev_alloc, cb_ev_alloc, acc_reg); + acc_prof_register (acc_ev_free, cb_ev_free, acc_reg); + +#if POINTERS + char *h = (char *) malloc (SIZE); +#else + char h[SIZE]; +#endif + + void *d; + cb_ev_alloc_expected = true; + d = acc_malloc (SIZE); + assert (d); + assert (!cb_ev_alloc_expected); + assert (cb_ev_alloc_bytes == SIZE); + assert (cb_ev_alloc_device_ptr == d); + + acc_map_data (h, d, SIZE); + assert (acc_is_present (h, SIZE)); + +#if OPENACC_RUNTIME + acc_create (h, SIZE); +#else +# if POINTERS +# pragma acc enter data create (h[0:SIZE]) +# else +# pragma acc enter data create (h) +# endif +#endif + +#if POINTERS +# pragma acc data create (h[0:SIZE]) + ; +#else +# pragma acc data create (h) + ; +#endif + assert (acc_is_present (h, SIZE)); + +#if OPENACC_RUNTIME + acc_delete (h, SIZE); +#else +# if POINTERS +# pragma acc exit data delete (h[0:SIZE]) +# else +# pragma acc exit data delete (h) +# endif +#endif + assert (acc_is_present (h, SIZE)); + + acc_unmap_data (h); + assert (!acc_is_present (h, SIZE)); + + cb_ev_free_expected = true; + acc_free (d); + assert (!cb_ev_free_expected); + assert (cb_ev_free_device_ptr == cb_ev_alloc_device_ptr); + +#if POINTERS + free (h); +#endif + + acc_prof_unregister (acc_ev_alloc, cb_ev_alloc, acc_reg); + acc_prof_unregister (acc_ev_free, cb_ev_free, acc_reg); +} + + +/* A structured 'data' construct. */ + +static void +f3 (void) +{ + cb_ev_alloc_expected = false; + cb_ev_free_expected = false; + acc_prof_register (acc_ev_alloc, cb_ev_alloc, acc_reg); + acc_prof_register (acc_ev_free, cb_ev_free, acc_reg); + +#if POINTERS + char *h = (char *) malloc (SIZE); +#else + char h[SIZE]; +#endif + + cb_ev_alloc_expected = true; +#if POINTERS +# pragma acc data create (h[0:SIZE]) +#else +# pragma acc data create (h) +#endif + { + void *d = acc_deviceptr (h); + assert (d); + assert (!cb_ev_alloc_expected); + assert (cb_ev_alloc_bytes == aligned_size (SIZE)); + assert (aligned_address (cb_ev_alloc_device_ptr) == d); + assert (acc_is_present (h, SIZE)); + +#if OPENACC_RUNTIME + acc_create (h, SIZE); +#else +# if POINTERS +# pragma acc enter data create (h[0:SIZE]) +# else +# pragma acc enter data create (h) +# endif +#endif + +#if POINTERS +# pragma acc data create (h[0:SIZE]) + ; +#else +# pragma acc data create (h) + ; +#endif + assert (acc_is_present (h, SIZE)); + +#if OPENACC_RUNTIME + acc_delete (h, SIZE); +#else +# if POINTERS +# pragma acc exit data delete (h[0:SIZE]) +# else +# pragma acc exit data delete (h) +# endif +#endif + assert (acc_is_present (h, SIZE)); + + cb_ev_free_expected = true; + } + assert (!cb_ev_free_expected); + assert (cb_ev_free_device_ptr == cb_ev_alloc_device_ptr); + assert (!acc_is_present (h, SIZE)); + +#if POINTERS + free (h); +#endif + + acc_prof_unregister (acc_ev_alloc, cb_ev_alloc, acc_reg); + acc_prof_unregister (acc_ev_free, cb_ev_free, acc_reg); +} + + +int +main () +{ + f1 (); + f2 (); + f3 (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/subset-subarray-mappings-1-r-p.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/subset-subarray-mappings-1-r-p.c index 9b5d83c66dd..907b8587773 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/subset-subarray-mappings-1-r-p.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/subset-subarray-mappings-1-r-p.c @@ -156,20 +156,16 @@ f1 (void) assert (acc_is_present (&myblock[i], SUBSET)); assert (acc_is_present (myblock, SIZE)); -#if 0 //TODO PR92848 if (last) cb_ev_free_expected = true; -#endif #if OPENACC_RUNTIME acc_delete (&myblock[i], SUBSET); #else # pragma acc exit data delete (myblock[i:SUBSET]) #endif -#if 0 //TODO PR92848 assert (!cb_ev_free_expected); if (last) assert (cb_ev_free_device_ptr == cb_ev_alloc_device_ptr); -#endif assert (acc_is_present (&myblock[i], SUBSET) != last); assert (acc_is_present (myblock, SIZE) != last); } @@ -331,9 +327,7 @@ f3 () assert (acc_is_present (h, SIZE)); assert (acc_is_present (&h[2], SIZE - 2)); -#if 0 //TODO PR92848 cb_ev_free_expected = true; -#endif #if OPENACC_RUNTIME acc_delete (h, SIZE); #else @@ -343,10 +337,8 @@ f3 () # pragma acc exit data delete (h) # endif #endif -#if 0 //TODO PR92848 assert (!cb_ev_free_expected); assert (cb_ev_free_device_ptr == cb_ev_alloc_device_ptr); -#endif assert (!acc_is_present (h, SIZE)); assert (!acc_is_present (&h[2], SIZE - 2)); @@ -401,19 +393,15 @@ f_lib_22 (void) memset (h, c1, SIZE); /* Now 'copyout' not the whole but only a "subset" subarray, missing one SUBSET at the beginning, and half a SUBSET at the end... */ -#if 0 //TODO PR92848 cb_ev_free_expected = true; -#endif #if OPENACC_RUNTIME acc_copyout (h + SUBSET, SIZE - SUBSET - SUBSET / 2); #else # pragma acc exit data copyout (h[SUBSET:SIZE - SUBSET - SUBSET / 2]) #endif -#if 0 //TODO PR92848 /* ..., yet, expect the device memory object to be 'free'd... */ assert (!cb_ev_free_expected); assert (cb_ev_free_device_ptr == cb_ev_alloc_device_ptr); -#endif /* ..., and the mapping to be removed... */ assert (!acc_is_present (h, SIZE)); assert (!acc_is_present (&h[SUBSET], SIZE - SUBSET - SUBSET / 2)); @@ -474,19 +462,15 @@ f_lib_30 (void) assert (aligned_address (cb_ev_alloc_device_ptr) == d); /* We 'delete' not the whole but only a "subset" subarray... */ -#if 0 //TODO PR92848 cb_ev_free_expected = true; -#endif #if OPENACC_RUNTIME acc_delete (h, SIZE - SUBSET); #else # pragma acc exit data delete (h[0:SIZE - SUBSET]) #endif -#if 0 //TODO PR92848 /* ..., yet, expect the device memory object to be 'free'd... */ assert (!cb_ev_free_expected); assert (cb_ev_free_device_ptr == cb_ev_alloc_device_ptr); -#endif /* ..., and the mapping to be removed. */ assert (!acc_is_present (h, SIZE)); assert (!acc_is_present (h, SIZE - SUBSET));