From 4fef8e4d8c8901db0fa21c4d49b7a851bff4ac9a Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 23 Jan 2019 08:16:42 +0000 Subject: [PATCH] [nvptx, libgomp] Fix assert (!s->map->active) in map_fini There are currently two situations where this assert triggers: ... libgomp/plugin/plugin-nvptx.c: map_fini: Assertion `!s->map->active' failed. ... First, in abort-1.c, a parallel region triggering an abort: ... int main (void) { #pragma acc parallel abort (); return 0; } ... The abort is detected in nvptx_exec as the CUDA_ERROR_ILLEGAL_INSTRUCTION return status of the cuStreamSynchronize call after kernel launch, which is then handled by calling non-returning function GOMP_PLUGIN_fatal. Consequently, the map_pop in nvptx_exec that in case of cuStreamSynchronize success would remove or inactive the element added by the map_push earlier in nvptx_exec, does not trigger. With the element no longer active, but still marked active and a member of s->map, we run into the assert during GOMP_OFFLOAD_fini_device, which is triggered from atexit handler gomp_target_fini (which is triggered by the GOMP_PLUGIN_fatal mentioned above calling exit). Second, in pr88941.c, an async parallel region without wait: ... int main (void) { #pragma acc parallel async ; /* no #pragma acc wait */ return 0; } ... Because nvptx_exec is handling an async region, it does not call map_pop for the element added by map_push, but schedules an kernel execution completion event to call map_pop. Again, we run into the assert during GOMP_OFFLOAD_fini_device, which is triggered from atexit handler gomp_target_fini, but the exit in this case is triggered by returning from main. So either the kernel is still running, or the kernel has completed but the corresponding event that is supposed to call map_pop is stuck in the event queue, waiting for an event_gc. Fix this by removing the assert, and skipping the freeing of device memory if the map is still marked active (though in the async case, this is more a workaround than an fix). 2019-01-23 Tom de Vries PR target/88941 PR target/88939 * plugin/plugin-nvptx.c (cuda_map_destroy): Handle map->active case. (map_fini): Remove "assert (!s->map->active)". * testsuite/libgomp.oacc-c-c++-common/pr88941.c: New test. From-SVN: r268177 --- libgomp/ChangeLog | 8 ++++++ libgomp/plugin/plugin-nvptx.c | 27 +++++++++++++++++-- .../libgomp.oacc-c-c++-common/pr88941.c | 15 +++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr88941.c diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 5d13262c398..30fb11d0290 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,11 @@ +2019-01-23 Tom de Vries + + PR target/88941 + PR target/88939 + * plugin/plugin-nvptx.c (cuda_map_destroy): Handle map->active case. + (map_fini): Remove "assert (!s->map->active)". + * testsuite/libgomp.oacc-c-c++-common/pr88941.c: New test. + 2019-01-23 Tom de Vries PR target/87835 diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index a220560b189..4a67191932e 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -237,7 +237,31 @@ cuda_map_create (size_t size) static void cuda_map_destroy (struct cuda_map *map) { - CUDA_CALL_ASSERT (cuMemFree, map->d); + if (map->active) + /* Possible reasons for the map to be still active: + - the associated async kernel might still be running. + - the associated async kernel might have finished, but the + corresponding event that should trigger the pop_map has not been + processed by event_gc. + - the associated sync kernel might have aborted + + The async cases could happen if the user specified an async region + without adding a corresponding wait that is guaranteed to be executed + (before returning from main, or in an atexit handler). + We do not want to deallocate a device pointer that is still being + used, so skip it. + + In the sync case, the device pointer is no longer used, but deallocating + it using cuMemFree will not succeed, so skip it. + + TODO: Handle this in a more constructive way, by f.i. waiting for streams + to finish before de-allocating them (PR88981), or by ensuring the CUDA + lib atexit handler is called before rather than after the libgomp plugin + atexit handler (PR83795). */ + ; + else + CUDA_CALL_ASSERT (cuMemFree, map->d); + free (map); } @@ -268,7 +292,6 @@ static bool map_fini (struct ptx_stream *s) { assert (s->map->next == NULL); - assert (!s->map->active); cuda_map_destroy (s->map); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr88941.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr88941.c new file mode 100644 index 00000000000..e31bb527df3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr88941.c @@ -0,0 +1,15 @@ +/* { dg-do run } */ + +#include + +int +main (void) +{ + +#pragma acc parallel async + ; + + /* no #pragma acc wait */ + return 0; +} + -- 2.30.2