From 2c71d454eb6c35af8bf64d4c15621bb380f494ab Mon Sep 17 00:00:00 2001 From: Chung-Lin Tang Date: Thu, 18 Aug 2016 14:46:19 +0000 Subject: [PATCH] re PR middle-end/70895 (OpenACC: loop reduction does not work. Output is zero.) 2016-08-18 Chung-Lin Tang PR middle-end/70895 gcc/ * gimplify.c (omp_add_variable): Adjust/add variable mapping on enclosing parallel construct for reduction variables on OpenACC loop directives. gcc/testsuite/ * gfortran.dg/goacc/loop-tree-1.f90: Add gimple scan-tree-dump test. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. libgomp/ * testsuite/libgomp.oacc-fortran/reduction-7.f90: Add explicit firstprivate clauses. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Remove explicit copy clauses. * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-flt.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c: Likewise. From-SVN: r239576 --- gcc/ChangeLog | 7 ++++ gcc/gimplify.c | 39 +++++++++++++++++++ gcc/testsuite/ChangeLog | 9 +++++ .../c-c++-common/goacc/reduction-1.c | 5 +++ .../c-c++-common/goacc/reduction-2.c | 5 +++ .../c-c++-common/goacc/reduction-3.c | 5 +++ .../c-c++-common/goacc/reduction-4.c | 5 +++ .../gfortran.dg/goacc/loop-tree-1.f90 | 3 +- libgomp/ChangeLog | 20 ++++++++++ .../libgomp.oacc-c-c++-common/collapse-2.c | 2 +- .../libgomp.oacc-c-c++-common/collapse-4.c | 2 +- .../libgomp.oacc-c-c++-common/loop-red-g-1.c | 2 +- .../loop-red-gwv-1.c | 2 +- .../libgomp.oacc-c-c++-common/loop-red-v-1.c | 2 +- .../libgomp.oacc-c-c++-common/loop-red-w-1.c | 2 +- .../libgomp.oacc-c-c++-common/loop-red-wv-1.c | 2 +- .../libgomp.oacc-c-c++-common/reduction-7.c | 23 ++++------- .../reduction-cplx-dbl.c | 6 +-- .../reduction-cplx-flt.c | 6 +-- .../libgomp.oacc-c-c++-common/reduction-dbl.c | 6 +-- .../libgomp.oacc-c-c++-common/reduction-flt.c | 6 +-- .../libgomp.oacc-fortran/reduction-6.f90 | 10 ++--- .../libgomp.oacc-fortran/reduction-7.f90 | 6 +-- 23 files changed, 132 insertions(+), 43 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 505e66eae01..da653161a66 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2016-08-18 Chung-Lin Tang + + PR middle-end/70895 + * gimplify.c (omp_add_variable): Adjust/add variable mapping on + enclosing parallel construct for reduction variables on OpenACC loop + directives. + 2016-08-18 Pierre-Marie de Rodat * dwarf2out.c (copy_dwarf_procedure): Remove obsolete comment. diff --git a/gcc/gimplify.c b/gcc/gimplify.c index 1e43dbb51cd..4715332eddf 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -6010,6 +6010,45 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree decl, unsigned int flags) n->value |= flags; else splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags); + + /* For reductions clauses in OpenACC loop directives, by default create a + copy clause on the enclosing parallel construct for carrying back the + results. */ + if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION)) + { + struct gimplify_omp_ctx *outer_ctx = ctx->outer_context; + while (outer_ctx) + { + n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl); + if (n != NULL) + { + /* Ignore local variables and explicitly declared clauses. */ + if (n->value & (GOVD_LOCAL | GOVD_EXPLICIT)) + break; + else if (outer_ctx->region_type == ORT_ACC_KERNELS) + { + /* According to the OpenACC spec, such a reduction variable + should already have a copy map on a kernels construct, + verify that here. */ + gcc_assert (!(n->value & GOVD_FIRSTPRIVATE) + && (n->value & GOVD_MAP)); + } + else if (outer_ctx->region_type == ORT_ACC_PARALLEL) + { + /* Remove firstprivate and make it a copy map. */ + n->value &= ~GOVD_FIRSTPRIVATE; + n->value |= GOVD_MAP; + } + } + else if (outer_ctx->region_type == ORT_ACC_PARALLEL) + { + splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl, + GOVD_MAP | GOVD_SEEN); + break; + } + outer_ctx = outer_ctx->outer_context; + } + } } /* Notice a threadprivate variable DECL used in OMP context CTX. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4c7e6091a49..76799287e46 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2016-08-18 Chung-Lin Tang + + PR middle-end/70895 + * gfortran.dg/goacc/loop-tree-1.f90: Add gimple scan-tree-dump test. + * c-c++-common/goacc/reduction-1.c: Likewise. + * c-c++-common/goacc/reduction-2.c: Likewise. + * c-c++-common/goacc/reduction-3.c: Likewise. + * c-c++-common/goacc/reduction-4.c: Likewise. + 2016-08-18 Alan Modra * gcc.c-torture/compile/pr72771.c: New. diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-1.c b/gcc/testsuite/c-c++-common/goacc/reduction-1.c index 3c1c2dda6cf..35bfc868708 100644 --- a/gcc/testsuite/c-c++-common/goacc/reduction-1.c +++ b/gcc/testsuite/c-c++-common/goacc/reduction-1.c @@ -1,3 +1,4 @@ +/* { dg-additional-options "-fdump-tree-gimple" } */ /* Integer reductions. */ #define n 1000 @@ -65,3 +66,7 @@ main(void) return 0; } + +/* Check that default copy maps are generated for loop reductions. */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 7 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-2.c b/gcc/testsuite/c-c++-common/goacc/reduction-2.c index c3105a2073c..9dba035adb6 100644 --- a/gcc/testsuite/c-c++-common/goacc/reduction-2.c +++ b/gcc/testsuite/c-c++-common/goacc/reduction-2.c @@ -1,3 +1,4 @@ +/* { dg-additional-options "-fdump-tree-gimple" } */ /* float reductions. */ #define n 1000 @@ -47,3 +48,7 @@ main(void) return 0; } + +/* Check that default copy maps are generated for loop reductions. */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-3.c b/gcc/testsuite/c-c++-common/goacc/reduction-3.c index 4dbde04bc3b..669cd438113 100644 --- a/gcc/testsuite/c-c++-common/goacc/reduction-3.c +++ b/gcc/testsuite/c-c++-common/goacc/reduction-3.c @@ -1,3 +1,4 @@ +/* { dg-additional-options "-fdump-tree-gimple" } */ /* double reductions. */ #define n 1000 @@ -47,3 +48,7 @@ main(void) return 0; } + +/* Check that default copy maps are generated for loop reductions. */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/reduction-4.c b/gcc/testsuite/c-c++-common/goacc/reduction-4.c index c4572b97bb1..5c3dfb19172 100644 --- a/gcc/testsuite/c-c++-common/goacc/reduction-4.c +++ b/gcc/testsuite/c-c++-common/goacc/reduction-4.c @@ -1,3 +1,4 @@ +/* { dg-additional-options "-fdump-tree-gimple" } */ /* complex reductions. */ #define n 1000 @@ -35,3 +36,7 @@ main(void) return 0; } + +/* Check that default copy maps are generated for loop reductions. */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */ diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 b/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 index 81bdc23e46c..150f9304e46 100644 --- a/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 +++ b/gcc/testsuite/gfortran.dg/goacc/loop-tree-1.f90 @@ -1,4 +1,4 @@ -! { dg-additional-options "-fdump-tree-original -std=f2008" } +! { dg-additional-options "-fdump-tree-original -fdump-tree-gimple -std=f2008" } ! test for tree-dump-original and spaces-commas @@ -44,3 +44,4 @@ end program test ! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } } ! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } } +! { dg-final { scan-tree-dump-times "map\\(tofrom:sum \\\[len: \[0-9\]+\\\]\\)" 1 "gimple" } } diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index e15bbc0c4ee..5849aca7e50 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,23 @@ +2016-08-18 Chung-Lin Tang + + PR middle-end/70895 + * testsuite/libgomp.oacc-fortran/reduction-7.f90: Add explicit + firstprivate clauses. + * testsuite/libgomp.oacc-fortran/reduction-6.f90: Remove explicit + copy clauses. + * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-flt.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. + * testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c: Likewise. + 2016-08-14 Chung-Lin Tang PR fortran/70598 diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c index 62bb5e0bbd9..1ea0a6b846d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-2.c @@ -8,7 +8,7 @@ main (void) int i, j, k, l = 0, f = 0, x = 0; int m1 = 4, m2 = -5, m3 = 17; -#pragma acc parallel copy(l) +#pragma acc parallel #pragma acc loop seq collapse(3) reduction(+:l) for (i = -2; i < m1; i++) for (j = m2; j < -2; j++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c index 52dd4353246..ea652b69e2a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/collapse-4.c @@ -11,7 +11,7 @@ main (void) memset (b, '\0', sizeof (b)); -#pragma acc parallel copy(b[0:3][0:3]) copy(l) +#pragma acc parallel copy(b[0:3][0:3]) { #pragma acc loop collapse(2) reduction(+:l) for (i = 0; i < 2; i++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c index a8684f9587e..d241d413625 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c @@ -11,7 +11,7 @@ int main () int ondev = 0; int t = 0, h = 0; -#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev) +#pragma acc parallel num_gangs(32) vector_length(32) copy(ondev) { #pragma acc loop gang reduction (+:t) for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index 3b104cf2f47..4ae4b7c1246 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -11,7 +11,7 @@ int main () int ondev = 0; int t = 0, h = 0; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev) +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev) { #pragma acc loop gang worker vector reduction(+:t) for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c index b77ae76e321..0556455d62f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -12,7 +12,7 @@ int main () int ondev = 0; int t = 0, h = 0; -#pragma acc parallel vector_length(32) copy(t) copy(ondev) +#pragma acc parallel vector_length(32) copy(ondev) { #pragma acc loop vector reduction (+:t) for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index 9cc12b3add8..19021d9d062 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -11,7 +11,7 @@ int main () int ondev = 0; int t = 0, h = 0; -#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) { #pragma acc loop worker reduction(+:t) for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c index 398b7cc97e5..0fec2dcfd9c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -10,7 +10,7 @@ int main () int ondev = 0; int t = 0, h = 0; -#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) { #pragma acc loop worker vector reduction (+:t) for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c index 76c33e4470d..cc3cd07301a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c @@ -13,8 +13,7 @@ void g_np_1() for (i = 0; i < 1024; i++) arr[i] = i; - #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(res) + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) { #pragma acc loop gang reduction(+:res) for (i = 0; i < 1024; i++) @@ -28,8 +27,7 @@ void g_np_1() res = hres = 1; - #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(res) + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) { #pragma acc loop gang reduction(*:res) for (i = 0; i < 12; i++) @@ -53,8 +51,7 @@ void gv_np_1() for (i = 0; i < 1024; i++) arr[i] = i; - #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(res) + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) { #pragma acc loop gang vector reduction(+:res) for (i = 0; i < 1024; i++) @@ -78,8 +75,7 @@ void gw_np_1() for (i = 0; i < 1024; i++) arr[i] = i; - #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(res) + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) { #pragma acc loop gang worker reduction(+:res) for (i = 0; i < 1024; i++) @@ -103,8 +99,7 @@ void gwv_np_1() for (i = 0; i < 1024; i++) arr[i] = i; - #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(res) + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) { #pragma acc loop gang worker vector reduction(+:res) for (i = 0; i < 1024; i++) @@ -128,8 +123,7 @@ void gwv_np_2() for (i = 0; i < 32768; i++) arr[i] = i; - #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(res) + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) { #pragma acc loop gang reduction(+:res) for (j = 0; j < 32; j++) @@ -161,7 +155,7 @@ void gwv_np_3() arr[i] = i; #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copyin(arr) copy(res) + copyin(arr) { #pragma acc loop gang reduction(+:res) for (j = 0; j < 32; j++) @@ -191,8 +185,7 @@ void gwv_np_4() for (i = 0; i < 32768; i++) arr[i] = i; - #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ - copy(res, mres) + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) { #pragma acc loop gang reduction(+:res) reduction(max:mres) for (j = 0; j < 32; j++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c index 9c8e825df52..aa19d490d17 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c @@ -22,7 +22,7 @@ vector (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel vector_length(32) copyin(ary[0:N]) { #pragma acc loop vector reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -46,7 +46,7 @@ worker (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_workers(32) copyin(ary[0:N]) { #pragma acc loop worker reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -70,7 +70,7 @@ gang (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_gangs (32) copyin(ary[0:N]) { #pragma acc loop gang reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c index 46bb70f27e0..5c533f25109 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c @@ -22,7 +22,7 @@ vector (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel vector_length(32) copyin(ary[0:N]) { #pragma acc loop vector reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -46,7 +46,7 @@ worker (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_workers(32) copyin(ary[0:N]) { #pragma acc loop worker reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -70,7 +70,7 @@ gang (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_gangs (32) copyin(ary[0:N]) { #pragma acc loop gang reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c index 430b1993126..987c4ccba5f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c @@ -19,7 +19,7 @@ vector (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel vector_length(32) copyin(ary[0:N]) { #pragma acc loop vector reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -43,7 +43,7 @@ worker (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_workers(32) copyin(ary[0:N]) { #pragma acc loop worker reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -67,7 +67,7 @@ gang (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_gangs (32) copyin(ary[0:N]) { #pragma acc loop gang reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-flt.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-flt.c index e6947fa5090..f08650a83a8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-flt.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-flt.c @@ -19,7 +19,7 @@ vector (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel vector_length(32) copyin(ary[0:N]) { #pragma acc loop vector reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -43,7 +43,7 @@ worker (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_workers(32) copyin(ary[0:N]) { #pragma acc loop worker reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) @@ -67,7 +67,7 @@ gang (Type ary[N], Type sum, Type prod) { Type tsum = 0, tprod = 1; -#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod) +#pragma acc parallel num_gangs (32) copyin(ary[0:N]) { #pragma acc loop gang reduction(+:tsum) reduction (*:tprod) for (int ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 index f3ed27527f5..c10e3f97a6f 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 @@ -19,7 +19,7 @@ program reduction hs1 = 0 hs2 = 0 - !$acc parallel num_gangs (1000) copy(gs1, gs2) + !$acc parallel num_gangs (1000) !$acc loop reduction(+:gs1, gs2) gang do i = 1, n gs1 = gs1 + 1 @@ -27,7 +27,7 @@ program reduction end do !$acc end parallel - !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2) + !$acc parallel num_workers (4) vector_length (32) !$acc loop reduction(+:ws1, ws2) worker do i = 1, n ws1 = ws1 + 1 @@ -35,7 +35,7 @@ program reduction end do !$acc end parallel - !$acc parallel vector_length (32) copy(vs1, vs2) + !$acc parallel vector_length (32) !$acc loop reduction(+:vs1, vs2) vector do i = 1, n vs1 = vs1 + 1 @@ -43,7 +43,7 @@ program reduction end do !$acc end parallel - !$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2) + !$acc parallel num_gangs(8) num_workers(4) vector_length(32) !$acc loop reduction(+:cs1, cs2) gang worker vector do i = 1, n cs1 = cs1 + 1 @@ -74,7 +74,7 @@ program reduction red = 0 vred = 0 - !$acc parallel num_gangs(10) vector_length(32) copy(red) + !$acc parallel num_gangs(10) vector_length(32) !$acc loop reduction(+:red) gang do i = 1, n/chunksize !$acc loop reduction(+:red) vector diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 index 8ec36adf1e3..1a7d69aa521 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 @@ -50,7 +50,7 @@ subroutine redsub_private(sum, n, arr) end subroutine redsub_private -! Bogus reduction on an impliclitly firstprivate variable. The results do +! Bogus reduction on a firstprivate variable. The results do ! survive the parallel region. The goal here is to ensure that gfortran ! doesn't ICE. @@ -58,7 +58,7 @@ subroutine redsub_bogus(sum, n) integer :: sum, n, arr(n) integer :: i - !$acc parallel + !$acc parallel firstprivate(sum) !$acc loop gang worker vector reduction (+:sum) do i = 1, n sum = sum + 1 @@ -72,7 +72,7 @@ subroutine redsub_combined(sum, n, arr) integer :: sum, n, arr(n) integer :: i, j - !$acc parallel copy (arr) + !$acc parallel copy (arr) firstprivate(sum) !$acc loop gang do i = 1, n sum = i; -- 2.30.2