From 7de562eec26f729b1b29e9817e80fa4cebb06774 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 14 Dec 2018 21:42:29 +0100 Subject: [PATCH] Revise libgomp.oacc-c-c++-common/data-2-lib.c, libgomp.oacc-c-c++-common/data-2.c These are meant to be functionally equivalent (but no longer are), just using different means. Also, use the OpenACC "*_async" functions recently added. libgomp/ * testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Revise. * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. From-SVN: r267149 --- libgomp/ChangeLog | 5 + .../libgomp.oacc-c-c++-common/data-2-lib.c | 129 +++++++-------- .../libgomp.oacc-c-c++-common/data-2.c | 148 ++++++++---------- 3 files changed, 125 insertions(+), 157 deletions(-) diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index b6cbb34908a..d84c3f4bfe2 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,8 @@ +2018-12-14 Thomas Schwinge + + * testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Revise. + * testsuite/libgomp.oacc-c-c++-common/data-2.c: Likewise. + 2018-12-14 Chung-Lin Tang * testsuite/libgomp.oacc-c-c++-common/data-2-lib.c: Adjust. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c index f553d3d839c..e432f8d9c79 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2-lib.c @@ -1,16 +1,15 @@ -/* This test is similar to data-2.c, but it uses acc_* library functions - to move data. */ - -/* { dg-do run } */ +/* Test asynchronous, unstructed data regions, runtime library variant. */ +/* See also data-2.c. */ #include +#undef NDEBUG #include #include int main (int argc, char **argv) { - int N = 128; //1024 * 1024; + int N = 12345; float *a, *b, *c, *d, *e; void *d_a, *d_b, *d_c, *d_d; int i; @@ -30,19 +29,21 @@ main (int argc, char **argv) b[i] = 0.0; } - d_a = acc_copyin (a, nbytes); - d_b = acc_copyin (b, nbytes); - acc_copyin (&N, sizeof (int)); + acc_copyin_async (a, nbytes, acc_async_noval); + acc_copyin_async (b, nbytes, acc_async_noval); + acc_copyin_async (&N, sizeof (int), acc_async_noval); -#pragma acc parallel present (a[0:N], b[0:N], N) async wait +#pragma acc parallel present (a[0:N], b[0:N], N) async #pragma acc loop for (i = 0; i < N; i++) b[i] = a[i]; - acc_wait_all (); + d_a = acc_deviceptr (a); + acc_memcpy_from_device_async (a, d_a, nbytes, acc_async_noval); + d_b = acc_deviceptr (b); + acc_memcpy_from_device_async (b, d_b, nbytes, acc_async_noval); - acc_memcpy_from_device (a, d_a, nbytes); - acc_memcpy_from_device (b, d_b, nbytes); + acc_wait (acc_async_noval); for (i = 0; i < N; i++) { @@ -56,19 +57,19 @@ main (int argc, char **argv) b[i] = 0.0; } - acc_update_device (a, nbytes); - acc_update_device (b, nbytes); + acc_update_device_async (a, nbytes, 1); + acc_update_device_async (b, nbytes, 1); -#pragma acc parallel present (a[0:N], b[0:N], N) async (1) +#pragma acc parallel present (a[0:N], b[0:N], N) async (1) #pragma acc loop for (i = 0; i < N; i++) b[i] = a[i]; + acc_memcpy_from_device_async (a, d_a, nbytes, 1); + acc_memcpy_from_device_async (b, d_b, nbytes, 1); + acc_wait (1); - acc_memcpy_from_device (a, d_a, nbytes); - acc_memcpy_from_device (b, d_b, nbytes); - for (i = 0; i < N; i++) { assert (a[i] == 2.0); @@ -83,46 +84,42 @@ main (int argc, char **argv) d[i] = 0.0; } - acc_update_device (a, nbytes); - acc_update_device (b, nbytes); - d_c = acc_copyin (c, nbytes); - d_d = acc_copyin (d, nbytes); + acc_update_device_async (a, nbytes, 0); + acc_update_device_async (b, nbytes, 1); + acc_copyin_async (c, nbytes, 2); + acc_copyin_async (d, nbytes, 3); -#pragma acc parallel present (a[0:N], b[0:N], N) async (1) +#pragma acc parallel present (a[0:N], b[0:N], N) wait (0) async (1) #pragma acc loop for (i = 0; i < N; i++) b[i] = (a[i] * a[i] * a[i]) / a[i]; -#pragma acc parallel present (a[0:N], c[0:N], N) async (2) +#pragma acc parallel present (a[0:N], c[0:N], N) wait (0) async (2) #pragma acc loop for (i = 0; i < N; i++) c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i]; -#pragma acc parallel present (a[0:N], d[0:N], N) async (3) +#pragma acc parallel present (a[0:N], d[0:N], N) wait (0) async (3) #pragma acc loop for (i = 0; i < N; i++) d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i]; - acc_wait_all (); + acc_memcpy_from_device_async (a, d_a, nbytes, 0); + acc_memcpy_from_device_async (b, d_b, nbytes, 1); + d_c = acc_deviceptr (c); + acc_memcpy_from_device_async (c, d_c, nbytes, 2); + d_d = acc_deviceptr (d); + acc_memcpy_from_device_async (d, d_d, nbytes, 3); - acc_memcpy_from_device (a, d_a, nbytes); - acc_memcpy_from_device (b, d_b, nbytes); - acc_memcpy_from_device (c, d_c, nbytes); - acc_memcpy_from_device (d, d_d, nbytes); + acc_wait_all_async (0); + acc_wait (0); for (i = 0; i < N; i++) { - if (a[i] != 3.0) - abort (); - - if (b[i] != 9.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); + assert (a[i] == 3.0); + assert (b[i] == 9.0); + assert (c[i] == 4.0); + assert (d[i] == 1.0); } for (i = 0; i < N; i++) @@ -134,53 +131,43 @@ main (int argc, char **argv) e[i] = 0.0; } - acc_update_device (a, nbytes); - acc_update_device (b, nbytes); - acc_update_device (c, nbytes); - acc_update_device (d, nbytes); - acc_copyin (e, nbytes); + acc_update_device_async (a, nbytes, 10); + acc_update_device_async (b, nbytes, 11); + acc_update_device_async (c, nbytes, 12); + acc_update_device_async (d, nbytes, 13); + acc_copyin_async (e, nbytes, 14); -#pragma acc parallel present (a[0:N], b[0:N], N) async (1) +#pragma acc parallel present (a[0:N], b[0:N], N) wait (10) async (11) for (int ii = 0; ii < N; ii++) b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; -#pragma acc parallel present (a[0:N], c[0:N], N) async (2) +#pragma acc parallel present (a[0:N], c[0:N], N) wait (10) async (12) for (int ii = 0; ii < N; ii++) c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; -#pragma acc parallel present (a[0:N], d[0:N], N) async (3) +#pragma acc parallel present (a[0:N], d[0:N], N) wait (10) async (13) for (int ii = 0; ii < N; ii++) d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; -#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N], N) \ - wait (1, 2, 3) async (4) +#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N], N) wait (11) wait (12) wait (13) async (14) for (int ii = 0; ii < N; ii++) e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; + acc_copyout_async (a, nbytes, 10); + acc_copyout_async (b, nbytes, 11); + acc_copyout_async (c, nbytes, 12); + acc_copyout_async (d, nbytes, 13); + acc_copyout_async (e, nbytes, 14); + acc_delete_async (&N, sizeof (int), 15); acc_wait_all (); - acc_copyout (a, nbytes); - acc_copyout (b, nbytes); - acc_copyout (c, nbytes); - acc_copyout (d, nbytes); - acc_copyout (e, nbytes); - acc_delete (&N, sizeof (int)); for (i = 0; i < N; i++) { - if (a[i] != 2.0) - abort (); - - if (b[i] != 4.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - - if (e[i] != 11.0) - abort (); + assert (a[i] == 2.0); + assert (b[i] == 4.0); + assert (c[i] == 4.0); + assert (d[i] == 1.0); + assert (e[i] == 11.0); } return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c index 81d623afa0e..c0f36d3be6b 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-2.c @@ -1,14 +1,14 @@ -/* Test 'acc enter/exit data' regions. */ - -/* { dg-do run } */ -/* { dg-xfail-run-if "TODO" { openacc_nvidia_accel_selected } } */ +/* Test asynchronous, unstructed data regions, directives variant. */ +/* See also data-2-lib.c. */ #include +#undef NDEBUG +#include int main (int argc, char **argv) { - int N = 128; //1024 * 1024; + int N = 12345; float *a, *b, *c, *d, *e; int i; int nbytes; @@ -27,48 +27,24 @@ main (int argc, char **argv) b[i] = 0.0; } -#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async -#pragma acc parallel present (a[0:N], b[0:N]) async wait -#pragma acc loop - for (i = 0; i < N; i++) - b[i] = a[i]; - -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait async -#pragma acc wait - - for (i = 0; i < N; i++) - { - if (a[i] != 3.0) - abort (); - - if (b[i] != 3.0) - abort (); - } - - for (i = 0; i < N; i++) - { - a[i] = 3.0; - b[i] = 0.0; - } +#pragma acc enter data copyin (a[0:N]) async +#pragma acc enter data copyin (b[0:N]) async +#pragma acc enter data copyin (N) async -#pragma acc enter data copyin (a[0:N]) async -#pragma acc enter data copyin (b[0:N]) async wait -#pragma acc enter data copyin (N) async wait -#pragma acc parallel async wait +#pragma acc parallel present (a[0:N], b[0:N], N) async #pragma acc loop for (i = 0; i < N; i++) b[i] = a[i]; -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) delete (N) wait async +#pragma acc update self (a[0:N]) async +#pragma acc update self (b[0:N]) async + #pragma acc wait for (i = 0; i < N; i++) { - if (a[i] != 3.0) - abort (); - - if (b[i] != 3.0) - abort (); + assert (a[i] == 3.0); + assert (b[i] == 3.0); } for (i = 0; i < N; i++) @@ -77,22 +53,23 @@ main (int argc, char **argv) b[i] = 0.0; } -#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async (1) -#pragma acc parallel present (a[0:N], b[0:N]) async (1) +#pragma acc update device (a[0:N]) async (1) +#pragma acc update device (b[0:N]) async (1) + +#pragma acc parallel present (a[0:N], b[0:N], N) async (1) #pragma acc loop for (i = 0; i < N; i++) b[i] = a[i]; -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) wait (1) async (1) +#pragma acc update self (a[0:N]) async (1) +#pragma acc update self (b[0:N]) async (1) + #pragma acc wait (1) for (i = 0; i < N; i++) { - if (a[i] != 2.0) - abort (); - - if (b[i] != 2.0) - abort (); + assert (a[i] == 2.0); + assert (b[i] == 2.0); } for (i = 0; i < N; i++) @@ -103,39 +80,40 @@ main (int argc, char **argv) d[i] = 0.0; } -#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (N) async (1) +#pragma acc update device (a[0:N]) async (0) +#pragma acc update device (b[0:N]) async (1) +#pragma acc enter data copyin (c[0:N]) async (2) +#pragma acc enter data copyin (d[0:N]) async (3) -#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1) +#pragma acc parallel present (a[0:N], b[0:N], N) wait (0) async (1) #pragma acc loop for (i = 0; i < N; i++) b[i] = (a[i] * a[i] * a[i]) / a[i]; -#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1) +#pragma acc parallel present (a[0:N], c[0:N], N) wait (0) async (2) #pragma acc loop for (i = 0; i < N; i++) c[i] = (a[i] + a[i] + a[i] + a[i]) / a[i]; -#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1) +#pragma acc parallel present (a[0:N], d[0:N], N) wait (0) async (3) #pragma acc loop for (i = 0; i < N; i++) d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i]; -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) copyout (d[0:N]) wait (1, 2, 3) async (1) -#pragma acc wait (1) +#pragma acc update self (a[0:N]) async (0) +#pragma acc update self (b[0:N]) async (1) +#pragma acc update self (c[0:N]) async (2) +#pragma acc update self (d[0:N]) async (3) + +#pragma acc wait async (0) +#pragma acc wait (0) for (i = 0; i < N; i++) { - if (a[i] != 3.0) - abort (); - - if (b[i] != 9.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); + assert (a[i] == 3.0); + assert (b[i] == 9.0); + assert (c[i] == 4.0); + assert (d[i] == 1.0); } for (i = 0; i < N; i++) @@ -147,45 +125,43 @@ main (int argc, char **argv) e[i] = 0.0; } -#pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (c[0:N]) copyin (d[0:N]) copyin (e[0:N]) copyin (N) async (1) +#pragma acc update device (a[0:N]) async (10) +#pragma acc update device (b[0:N]) async (11) +#pragma acc update device (c[0:N]) async (12) +#pragma acc update device (d[0:N]) async (13) +#pragma acc enter data copyin (e[0:N]) async (14) -#pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1) +#pragma acc parallel present (a[0:N], b[0:N], N) wait (10) async (11) for (int ii = 0; ii < N; ii++) b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii]; -#pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1) +#pragma acc parallel present (a[0:N], c[0:N], N) wait (10) async (12) for (int ii = 0; ii < N; ii++) c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii]; -#pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1) +#pragma acc parallel present (a[0:N], d[0:N], N) wait (10) async (13) for (int ii = 0; ii < N; ii++) d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii]; -#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \ - wait (1, 2, 3) async (4) +#pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N], N) wait (11) wait (12) wait (13) async (14) for (int ii = 0; ii < N; ii++) e[ii] = a[ii] + b[ii] + c[ii] + d[ii]; -#pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \ - copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1) -#pragma acc wait (1) +#pragma acc exit data copyout (a[0:N]) async (10) +#pragma acc exit data copyout (b[0:N]) async (11) +#pragma acc exit data copyout (c[0:N]) async (12) +#pragma acc exit data copyout (d[0:N]) async (13) +#pragma acc exit data copyout (e[0:N]) async (14) +#pragma acc exit data delete (N) async (15) +#pragma acc wait for (i = 0; i < N; i++) { - if (a[i] != 2.0) - abort (); - - if (b[i] != 4.0) - abort (); - - if (c[i] != 4.0) - abort (); - - if (d[i] != 1.0) - abort (); - - if (e[i] != 11.0) - abort (); + assert (a[i] == 2.0); + assert (b[i] == 4.0); + assert (c[i] == 4.0); + assert (d[i] == 1.0); + assert (e[i] == 11.0); } return 0; -- 2.30.2