+2016-08-18 Chung-Lin Tang <cltang@codesourcery.com>
+
+ PR middle-end/70895
+ * gimplify.c (omp_add_variable): Adjust/add variable mapping on
+ enclosing parallel construct for reduction variables on OpenACC loop
+ directives.
+
2016-08-18 Pierre-Marie de Rodat <derodat@adacore.com>
* dwarf2out.c (copy_dwarf_procedure): Remove obsolete comment.
n->value |= flags;
else
splay_tree_insert (ctx->variables, (splay_tree_key)decl, flags);
+
+ /* For reductions clauses in OpenACC loop directives, by default create a
+ copy clause on the enclosing parallel construct for carrying back the
+ results. */
+ if (ctx->region_type == ORT_ACC && (flags & GOVD_REDUCTION))
+ {
+ struct gimplify_omp_ctx *outer_ctx = ctx->outer_context;
+ while (outer_ctx)
+ {
+ n = splay_tree_lookup (outer_ctx->variables, (splay_tree_key)decl);
+ if (n != NULL)
+ {
+ /* Ignore local variables and explicitly declared clauses. */
+ if (n->value & (GOVD_LOCAL | GOVD_EXPLICIT))
+ break;
+ else if (outer_ctx->region_type == ORT_ACC_KERNELS)
+ {
+ /* According to the OpenACC spec, such a reduction variable
+ should already have a copy map on a kernels construct,
+ verify that here. */
+ gcc_assert (!(n->value & GOVD_FIRSTPRIVATE)
+ && (n->value & GOVD_MAP));
+ }
+ else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
+ {
+ /* Remove firstprivate and make it a copy map. */
+ n->value &= ~GOVD_FIRSTPRIVATE;
+ n->value |= GOVD_MAP;
+ }
+ }
+ else if (outer_ctx->region_type == ORT_ACC_PARALLEL)
+ {
+ splay_tree_insert (outer_ctx->variables, (splay_tree_key)decl,
+ GOVD_MAP | GOVD_SEEN);
+ break;
+ }
+ outer_ctx = outer_ctx->outer_context;
+ }
+ }
}
/* Notice a threadprivate variable DECL used in OMP context CTX.
+2016-08-18 Chung-Lin Tang <cltang@codesourcery.com>
+
+ PR middle-end/70895
+ * gfortran.dg/goacc/loop-tree-1.f90: Add gimple scan-tree-dump test.
+ * c-c++-common/goacc/reduction-1.c: Likewise.
+ * c-c++-common/goacc/reduction-2.c: Likewise.
+ * c-c++-common/goacc/reduction-3.c: Likewise.
+ * c-c++-common/goacc/reduction-4.c: Likewise.
+
2016-08-18 Alan Modra <amodra@gmail.com>
* gcc.c-torture/compile/pr72771.c: New.
+/* { dg-additional-options "-fdump-tree-gimple" } */
/* Integer reductions. */
#define n 1000
return 0;
}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 7 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
+/* { dg-additional-options "-fdump-tree-gimple" } */
/* float reductions. */
#define n 1000
return 0;
}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
+/* { dg-additional-options "-fdump-tree-gimple" } */
/* double reductions. */
#define n 1000
return 0;
}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 4 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
+/* { dg-additional-options "-fdump-tree-gimple" } */
/* complex reductions. */
#define n 1000
return 0;
}
+
+/* Check that default copy maps are generated for loop reductions. */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:result \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "map\\(tofrom:lresult \\\[len: \[0-9\]+\\\]\\)" 2 "gimple" } } */
-! { dg-additional-options "-fdump-tree-original -std=f2008" }
+! { dg-additional-options "-fdump-tree-original -fdump-tree-gimple -std=f2008" }
! test for tree-dump-original and spaces-commas
! { dg-final { scan-tree-dump-times "private\\(m\\)" 1 "original" } }
! { dg-final { scan-tree-dump-times "reduction\\(\\+:sum\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "map\\(tofrom:sum \\\[len: \[0-9\]+\\\]\\)" 1 "gimple" } }
+2016-08-18 Chung-Lin Tang <cltang@codesourcery.com>
+
+ PR middle-end/70895
+ * testsuite/libgomp.oacc-fortran/reduction-7.f90: Add explicit
+ firstprivate clauses.
+ * testsuite/libgomp.oacc-fortran/reduction-6.f90: Remove explicit
+ copy clauses.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-flt.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/collapse-2.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/collapse-4.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/reduction-dbl.c: Likewise.
+
2016-08-14 Chung-Lin Tang <cltang@codesourcery.com>
PR fortran/70598
int i, j, k, l = 0, f = 0, x = 0;
int m1 = 4, m2 = -5, m3 = 17;
-#pragma acc parallel copy(l)
+#pragma acc parallel
#pragma acc loop seq collapse(3) reduction(+:l)
for (i = -2; i < m1; i++)
for (j = m2; j < -2; j++)
memset (b, '\0', sizeof (b));
-#pragma acc parallel copy(b[0:3][0:3]) copy(l)
+#pragma acc parallel copy(b[0:3][0:3])
{
#pragma acc loop collapse(2) reduction(+:l)
for (i = 0; i < 2; i++)
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_gangs(32) vector_length(32) copy(ondev)
{
#pragma acc loop gang reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
{
#pragma acc loop gang worker vector reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ondev)
{
#pragma acc loop vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
{
#pragma acc loop worker reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
int ondev = 0;
int t = 0, h = 0;
-#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
{
#pragma acc loop worker vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res)
for (i = 0; i < 1024; i++)
res = hres = 1;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(*:res)
for (i = 0; i < 12; i++)
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang vector reduction(+:res)
for (i = 0; i < 1024; i++)
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang worker reduction(+:res)
for (i = 0; i < 1024; i++)
for (i = 0; i < 1024; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang worker vector reduction(+:res)
for (i = 0; i < 1024; i++)
for (i = 0; i < 32768; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
arr[i] = i;
#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copyin(arr) copy(res)
+ copyin(arr)
{
#pragma acc loop gang reduction(+:res)
for (j = 0; j < 32; j++)
for (i = 0; i < 32768; i++)
arr[i] = i;
- #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
- copy(res, mres)
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32)
{
#pragma acc loop gang reduction(+:res) reduction(max:mres)
for (j = 0; j < 32; j++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel vector_length(32) copyin(ary[0:N])
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_workers(32) copyin(ary[0:N])
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
Type tsum = 0, tprod = 1;
-#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+#pragma acc parallel num_gangs (32) copyin(ary[0:N])
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
hs1 = 0
hs2 = 0
- !$acc parallel num_gangs (1000) copy(gs1, gs2)
+ !$acc parallel num_gangs (1000)
!$acc loop reduction(+:gs1, gs2) gang
do i = 1, n
gs1 = gs1 + 1
end do
!$acc end parallel
- !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2)
+ !$acc parallel num_workers (4) vector_length (32)
!$acc loop reduction(+:ws1, ws2) worker
do i = 1, n
ws1 = ws1 + 1
end do
!$acc end parallel
- !$acc parallel vector_length (32) copy(vs1, vs2)
+ !$acc parallel vector_length (32)
!$acc loop reduction(+:vs1, vs2) vector
do i = 1, n
vs1 = vs1 + 1
end do
!$acc end parallel
- !$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2)
+ !$acc parallel num_gangs(8) num_workers(4) vector_length(32)
!$acc loop reduction(+:cs1, cs2) gang worker vector
do i = 1, n
cs1 = cs1 + 1
red = 0
vred = 0
- !$acc parallel num_gangs(10) vector_length(32) copy(red)
+ !$acc parallel num_gangs(10) vector_length(32)
!$acc loop reduction(+:red) gang
do i = 1, n/chunksize
!$acc loop reduction(+:red) vector
end subroutine redsub_private
-! Bogus reduction on an impliclitly firstprivate variable. The results do
+! Bogus reduction on a firstprivate variable. The results do
! survive the parallel region. The goal here is to ensure that gfortran
! doesn't ICE.
integer :: sum, n, arr(n)
integer :: i
- !$acc parallel
+ !$acc parallel firstprivate(sum)
!$acc loop gang worker vector reduction (+:sum)
do i = 1, n
sum = sum + 1
integer :: sum, n, arr(n)
integer :: i, j
- !$acc parallel copy (arr)
+ !$acc parallel copy (arr) firstprivate(sum)
!$acc loop gang
do i = 1, n
sum = i;