From f847198ec32588beda29a03572a765f9f91b0644 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 14 Dec 2018 21:43:02 +0100 Subject: [PATCH] [PR88495] An OpenACC async queue is always synchronized with itself An OpenACC async queue is always synchronized with itself, so invocations like "#pragma acc wait(0) async(0)", or "acc_wait_async (0, 0)" don't make a lot of sense, but are still valid. libgomp/ PR libgomp/88495 * plugin/plugin-nvptx.c (nvptx_wait_async): Don't refuse "identical parameters". * testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c: Update. * testsuite/libgomp.oacc-c-c++-common/lib-80.c: Remove. From-SVN: r267152 --- libgomp/ChangeLog | 6 + libgomp/plugin/plugin-nvptx.c | 3 +- .../asyncwait-nop-1.c | 3 - .../libgomp.oacc-c-c++-common/lib-80.c | 135 ------------------ 4 files changed, 8 insertions(+), 139 deletions(-) delete mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 2914066f753..349497d58ee 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,5 +1,11 @@ 2018-12-14 Thomas Schwinge + PR libgomp/88495 + * plugin/plugin-nvptx.c (nvptx_wait_async): Don't refuse + "identical parameters". + * testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c: Update. + * testsuite/libgomp.oacc-c-c++-common/lib-80.c: Remove. + PR libgomp/88484 * oacc-parallel.c (GOACC_wait): Correct handling for "async >= 0". * testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c: New file. diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 6f9b16634b1..fb686de73f2 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1617,8 +1617,9 @@ nvptx_wait_async (int async1, int async2) necessarily have to exist already. */ s2 = select_stream_for_async (async2, self, true, NULL); + /* A stream is always synchronized with itself. */ if (s1 == s2) - GOMP_PLUGIN_fatal ("identical parameters"); + return; e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent)); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c index e4f627d38bc..4ab67363ba6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-nop-1.c @@ -51,9 +51,6 @@ main () { for (size_t j = 0; j < values_n; ++j) { - if (values[i] == values[j]) - continue; - #pragma acc parallel wait (values[i]) async (values[j]) ; #pragma acc wait (values[i]) async (values[j]) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c deleted file mode 100644 index 9a9a837fa4f..00000000000 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-80.c +++ /dev/null @@ -1,135 +0,0 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* { dg-additional-options "-lcuda" } */ - -#include -#include -#include -#include -#include -#include "timer.h" - -int -main (int argc, char **argv) -{ - CUdevice dev; - CUfunction delay; - CUmodule module; - CUresult r; - CUstream stream; - int N; - int i; - unsigned long *a, *d_a, dticks; - int nbytes; - float atime, dtime; - void *kargs[2]; - int clkrate; - int devnum, nprocs; - - acc_init (acc_device_nvidia); - - devnum = acc_get_device_num (acc_device_nvidia); - - r = cuDeviceGet (&dev, devnum); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGet failed: %d\n", r); - abort (); - } - - r = - cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, - dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r); - abort (); - } - - r = cuModuleLoad (&module, "subr.ptx"); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuModuleLoad failed: %d\n", r); - abort (); - } - - r = cuModuleGetFunction (&delay, module, "delay"); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuModuleGetFunction failed: %d\n", r); - abort (); - } - - nbytes = nprocs * sizeof (unsigned long); - - dtime = 200.0; - - dticks = (unsigned long) (dtime * clkrate); - - N = nprocs; - - a = (unsigned long *) malloc (nbytes); - d_a = (unsigned long *) acc_malloc (nbytes); - - acc_map_data (a, d_a, nbytes); - - r = cuStreamCreate (&stream, CU_STREAM_DEFAULT); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuStreamCreate failed: %d\n", r); - abort (); - } - - acc_set_cuda_stream (1, stream); - - init_timers (1); - - kargs[0] = (void *) &d_a; - kargs[1] = (void *) &dticks; - - start_timer (0); - - for (i = 0; i < N; i++) - { - r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0); - if (r != CUDA_SUCCESS) - { - fprintf (stderr, "cuLaunchKernel failed: %d\n", r); - abort (); - } - } - - fprintf (stderr, "CheCKpOInT\n"); - acc_wait_async (1, 1); - - acc_wait (1); - - atime = stop_timer (0); - - if (atime < dtime) - { - fprintf (stderr, "actual time < delay time\n"); - abort (); - } - - acc_unmap_data (a); - - fini_timers (); - - free (a); - acc_free (d_a); - - acc_shutdown (acc_device_nvidia); - - return 0; -} - -/* { dg-output "CheCKpOInT(\n|\r\n|\r).*" } */ -/* { dg-output "identical parameters" } */ -/* { dg-shouldfail "" } */ -- 2.30.2