From 5d6823a25ec072c8f342a94f86ab159d4659c43d Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Wed, 4 Nov 2015 17:01:23 +0000 Subject: [PATCH] loop-red-g-1.c: New. * libgomp.oacc-c-c++-common/loop-red-g-1.c: New. * libgomp.oacc-c-c++-common/loop-red-gwv-1.c: New. * libgomp.oacc-c-c++-common/loop-red-v-1.c: New. * libgomp.oacc-c-c++-common/loop-red-v-2.c: New. * libgomp.oacc-c-c++-common/loop-red-w-1.c: New. * libgomp.oacc-c-c++-common/loop-red-w-2.c: New. * libgomp.oacc-c-c++-common/loop-red-wv-1.c: New. * libgomp.oacc-fortran/reduction-5.f90: Avoid reference var. From-SVN: r229770 --- libgomp/ChangeLog | 11 ++++ .../libgomp.oacc-c-c++-common/loop-red-g-1.c | 54 +++++++++++++++++ .../loop-red-gwv-1.c | 56 ++++++++++++++++++ .../libgomp.oacc-c-c++-common/loop-red-v-1.c | 56 ++++++++++++++++++ .../libgomp.oacc-c-c++-common/loop-red-v-2.c | 59 +++++++++++++++++++ .../libgomp.oacc-c-c++-common/loop-red-w-1.c | 54 +++++++++++++++++ .../libgomp.oacc-c-c++-common/loop-red-w-2.c | 57 ++++++++++++++++++ .../libgomp.oacc-c-c++-common/loop-red-wv-1.c | 54 +++++++++++++++++ .../libgomp.oacc-fortran/reduction-5.f90 | 11 ++-- 9 files changed, 408 insertions(+), 4 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 0effe1c0208..e0897e8f713 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,14 @@ +2015-11-04 Nathan Sidwell + + * libgomp.oacc-c-c++-common/loop-red-g-1.c: New. + * libgomp.oacc-c-c++-common/loop-red-gwv-1.c: New. + * libgomp.oacc-c-c++-common/loop-red-v-1.c: New. + * libgomp.oacc-c-c++-common/loop-red-v-2.c: New. + * libgomp.oacc-c-c++-common/loop-red-w-1.c: New. + * libgomp.oacc-c-c++-common/loop-red-w-2.c: New. + * libgomp.oacc-c-c++-common/loop-red-wv-1.c: New. + * libgomp.oacc-fortran/reduction-5.f90: Avoid reference var. + 2015-11-03 Nathan Sidwell * libgomp.h (struct acc_dispatch_t): Remove args from exec_func. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c new file mode 100644 index 00000000000..f315cdc7fc4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) +int main () +{ + int ix; + int ondev = 0; + int t = 0, h = 0; + +#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev) + { +#pragma acc loop gang reduction (+:t) + for (unsigned ix = 0; ix < N; ix++) + { + int val = ix; + + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + val = (g << 16) | (w << 8) | v; + ondev = 1; + } + t += val; + } + } + + for (ix = 0; ix < N; ix++) + { + int val = ix; + if(ondev) + { + int g = ix / ((N + 31) / 32); + int w = 0; + int v = 0; + + val = (g << 16) | (w << 8) | v; + } + h += val; + } + if (t != h) + { + printf ("t=%x expected %x\n", t, h); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c new file mode 100644 index 00000000000..1663e65e81a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) +int main () +{ + int ix; + int ondev = 0; + int t = 0, h = 0; + +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev) + { +#pragma acc loop gang worker vector reduction(+:t) + for (unsigned ix = 0; ix < N; ix++) + { + int val = ix; + + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + val = (g << 16) | (w << 8) | v; + ondev = 1; + } + t += val; + } + } + + for (ix = 0; ix < N; ix++) + { + int val = ix; + if(ondev) + { + int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + + int g = ix / (chunk_size * 32 * 32); + int w = ix / 32 % 32; + int v = ix % 32; + + val = (g << 16) | (w << 8) | v; + } + h += val; + } + if (t != h) + { + printf ("t=%x expected %x\n", t, h); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c new file mode 100644 index 00000000000..be6916ab241 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +int main () +{ + int ix; + int ondev = 0; + int t = 0, h = 0; + +#pragma acc parallel vector_length(32) copy(t) copy(ondev) + { +#pragma acc loop vector reduction (+:t) + for (unsigned ix = 0; ix < N; ix++) + { + int val = ix; + + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + val = (g << 16) | (w << 8) | v; + ondev = 1; + } + t += val; + } + } + + for (ix = 0; ix < N; ix++) + { + int val = ix; + if (ondev) + { + int g = 0; + int w = 0; + int v = ix % 32; + + val = (g << 16) | (w << 8) | v; + } + h += val; + } + + if (t != h) + { + printf ("t=%x expected %x\n", t, h); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c new file mode 100644 index 00000000000..e66732da32c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -0,0 +1,59 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) + +int main () +{ + int ix; + int ondev = 0; + int q = 0, h = 0; + +#pragma acc parallel vector_length(32) copy(q) copy(ondev) + { + int t = q; + +#pragma acc loop vector reduction (+:t) + for (unsigned ix = 0; ix < N; ix++) + { + int val = ix; + + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + val = (g << 16) | (w << 8) | v; + ondev = 1; + } + t += val; + } + q = t; + } + + for (ix = 0; ix < N; ix++) + { + int val = ix; + if (ondev) + { + int g = 0; + int w = 0; + int v = ix % 32; + + val = (g << 16) | (w << 8) | v; + } + h += val; + } + + if (q != h) + { + printf ("t=%x expected %x\n", q, h); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c new file mode 100644 index 00000000000..85fe6ebb482 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) +int main () +{ + int ix; + int ondev = 0; + int t = 0, h = 0; + +#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev) + { +#pragma acc loop worker reduction(+:t) + for (unsigned ix = 0; ix < N; ix++) + { + int val = ix; + + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + val = (g << 16) | (w << 8) | v; + ondev = 1; + } + t += val; + } + } + + for (ix = 0; ix < N; ix++) + { + int val = ix; + if(ondev) + { + int g = 0; + int w = ix % 32; + int v = 0; + + val = (g << 16) | (w << 8) | v; + } + h += val; + } + if (t != h) + { + printf ("t=%x expected %x\n", t, h); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c new file mode 100644 index 00000000000..0059077b685 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) +int main () +{ + int ix; + int ondev = 0; + int q = 0, h = 0; + +#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) + { + int t = q; + +#pragma acc loop worker reduction(+:t) + for (unsigned ix = 0; ix < N; ix++) + { + int val = ix; + + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + val = (g << 16) | (w << 8) | v; + ondev = 1; + } + t += val; + } + q = t; + } + + for (ix = 0; ix < N; ix++) + { + int val = ix; + if(ondev) + { + int g = 0; + int w = ix % 32; + int v = 0; + + val = (g << 16) | (w << 8) | v; + } + h += val; + } + if (q != h) + { + printf ("t=%x expected %x\n", q, h); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c new file mode 100644 index 00000000000..3a5be5ce0d3 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-additional-options "-O2" */ + +#include + +#define N (32*32*32+17) +int main () +{ + int ix; + int ondev = 0; + int t = 0, h = 0; + +#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev) + { +#pragma acc loop worker vector reduction (+:t) + for (unsigned ix = 0; ix < N; ix++) + { + int val = ix; + + if (__builtin_acc_on_device (5)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + val = (g << 16) | (w << 8) | v; + ondev = 1; + } + t += val; + } + } + + for (ix = 0; ix < N; ix++) + { + int val = ix; + if(ondev) + { + int g = 0; + int w = (ix / 32) % 32; + int v = ix % 32; + + val = (g << 16) | (w << 8) | v; + } + h += val; + } + if (t != h) + { + printf ("t=%x expected %x\n", t, h); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 index df44a7ae69d..ec13e4e6c07 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 @@ -21,12 +21,15 @@ end program reduction subroutine redsub(sum, n, c) integer :: sum, n, c - sum = 0 + integer :: s + s = 0 - !$acc parallel vector_length(n) copyin (n, c) num_gangs(1) - !$acc loop reduction(+:sum) + !$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1) + !$acc loop reduction(+:s) do i = 1, n - sum = sum + c + s = s + c end do !$acc end parallel + + sum = s end subroutine redsub -- 2.30.2