2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
+ PR middle-end/71373
+ * tree-nested.c (convert_nonlocal_omp_clauses)
+ (convert_local_omp_clauses): Handle OMP_CLAUSE_ASYNC,
+ OMP_CLAUSE_WAIT, OMP_CLAUSE_INDEPENDENT, OMP_CLAUSE_AUTO,
+ OMP_CLAUSE__CACHE_, OMP_CLAUSE_TILE.
+
+ * gimplify.c (gimplify_adjust_omp_clauses): Discard
+ OMP_CLAUSE_TILE.
+ * omp-low.c (scan_sharing_clauses): Don't expect OMP_CLAUSE_TILE.
+
* omp-low.c (scan_sharing_clauses): Don't expect
OMP_CLAUSE__CACHE_.
case OMP_CLAUSE_VECTOR:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
+ break;
+
case OMP_CLAUSE_TILE:
+ /* We're not yet making use of the information provided by OpenACC
+ tile clauses. Discard these here, to simplify later middle end
+ processing. */
+ remove = true;
break;
default:
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
- case OMP_CLAUSE_TILE:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
install_var_local (decl, ctx);
break;
+ case OMP_CLAUSE_TILE:
case OMP_CLAUSE__CACHE_:
default:
gcc_unreachable ();
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
- case OMP_CLAUSE_TILE:
case OMP_CLAUSE_INDEPENDENT:
case OMP_CLAUSE_AUTO:
case OMP_CLAUSE_SEQ:
case OMP_CLAUSE__GRIDDIM_:
break;
+ case OMP_CLAUSE_TILE:
case OMP_CLAUSE__CACHE_:
default:
gcc_unreachable ();
2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
+ Cesar Philippidis <cesar@codesourcery.com>
+
+ PR middle-end/71373
+ * gcc.dg/goacc/nested-function-1.c: New file.
+ * gcc.dg/goacc/nested-function-2.c: Likewise.
+ * gcc.dg/goacc/pr71373.c: Likewise.
+ * gfortran.dg/goacc/cray-2.f95: Likewise.
+ * gfortran.dg/goacc/loop-1-2.f95: Likewise.
+ * gfortran.dg/goacc/loop-3-2.f95: Likewise.
+ * gfortran.dg/goacc/cray.f95: Update.
+ * gfortran.dg/goacc/loop-1.f95: Likewise.
+ * gfortran.dg/goacc/loop-3.f95: Likewise.
+ * gfortran.dg/goacc/subroutines.f90: Update, and rename to...
+ * gfortran.dg/goacc/nested-function-1.f90: ... this new file.
+
+2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
+
+ * c-c++-common/goacc/combined-directives.c: XFAIL tree scanning
+ for OpenACC tile clauses.
+ * gfortran.dg/goacc/combined-directives.f90: Likewise.
PR c/71381
* c-c++-common/goacc/cache-1.c: Update. Move invalid usage tests
// { dg-final { scan-tree-dump-times "acc loop vector" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "acc loop seq" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "acc loop auto" 2 "gimple" } }
-// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" } }
+// XFAILed: OpenACC tile clauses are discarded during gimplification.
+// { dg-final { scan-tree-dump-times "acc loop tile.2, 3" 2 "gimple" { xfail *-*-* } } }
// { dg-final { scan-tree-dump-times "acc loop independent private.i" 2 "gimple" } }
// { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
--- /dev/null
+/* Exercise nested function decomposition, gcc/tree-nested.c. */
+/* See gcc/testsuite/gfortran.dg/goacc/nested-function-1.f90 for the Fortran
+ version. */
+
+int main ()
+{
+#define N 100
+ int nonlocal_arg;
+ int nonlocal_a[N];
+ int nonlocal_i;
+ int nonlocal_j;
+
+ for (int i = 0; i < N; ++i)
+ nonlocal_a[i] = 5;
+ nonlocal_arg = 5;
+
+ void local ()
+ {
+ int local_i;
+ int local_arg;
+ int local_a[N];
+ int local_j;
+
+ for (int i = 0; i < N; ++i)
+ local_a[i] = 5;
+ local_arg = 5;
+
+#pragma acc kernels loop \
+ gang(num:local_arg) worker(local_arg) vector(local_arg) \
+ wait async(local_arg)
+ for (local_i = 0; local_i < N; ++local_i)
+ {
+#pragma acc cache (local_a[local_i:5])
+ local_a[local_i] = 100;
+#pragma acc loop seq tile(*)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+#pragma acc loop auto independent tile(1)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+ }
+
+#pragma acc kernels loop \
+ gang(static:local_arg) worker(local_arg) vector(local_arg) \
+ wait(local_arg, local_arg + 1, local_arg + 2) async
+ for (local_i = 0; local_i < N; ++local_i)
+ {
+#pragma acc cache (local_a[local_i:4])
+ local_a[local_i] = 100;
+#pragma acc loop seq tile(1)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+#pragma acc loop auto independent tile(*)
+ for (local_j = 0; local_j < N; ++local_j)
+ ;
+ }
+ }
+
+ void nonlocal ()
+ {
+ for (int i = 0; i < N; ++i)
+ nonlocal_a[i] = 5;
+ nonlocal_arg = 5;
+
+#pragma acc kernels loop \
+ gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+ wait async(nonlocal_arg)
+ for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+ {
+#pragma acc cache (nonlocal_a[nonlocal_i:3])
+ nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(2)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+#pragma acc loop auto independent tile(3)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+ }
+
+#pragma acc kernels loop \
+ gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) \
+ wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+ for (nonlocal_i = 0; nonlocal_i < N; ++nonlocal_i)
+ {
+#pragma acc cache (nonlocal_a[nonlocal_i:2])
+ nonlocal_a[nonlocal_i] = 100;
+#pragma acc loop seq tile(*)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+#pragma acc loop auto independent tile(*)
+ for (nonlocal_j = 0; nonlocal_j < N; ++nonlocal_j)
+ ;
+ }
+ }
+
+ local ();
+ nonlocal ();
+
+ return 0;
+}
--- /dev/null
+/* Exercise nested function decomposition, gcc/tree-nested.c. */
+
+int
+main (void)
+{
+ int j = 0, k = 6, l = 7, m = 8;
+ void simple (void)
+ {
+ int i;
+#pragma acc parallel
+ {
+#pragma acc loop
+ for (i = 0; i < m; i+= k)
+ j = (m + i - j) * l;
+ }
+ }
+ void collapse (void)
+ {
+ int x, y, z;
+#pragma acc parallel
+ {
+#pragma acc loop collapse (3)
+ for (x = 0; x < k; x++)
+ for (y = -5; y < l; y++)
+ for (z = 0; z < m; z++)
+ j += x + y + z;
+ }
+ }
+ void reduction (void)
+ {
+ int x, y, z;
+#pragma acc parallel reduction (+:j)
+ {
+#pragma acc loop reduction (+:j) collapse (3)
+ for (x = 0; x < k; x++)
+ for (y = -5; y < l; y++)
+ for (z = 0; z < m; z++)
+ j += x + y + z;
+ }
+ }
+ simple();
+ collapse();
+ reduction();
+ return 0;
+}
--- /dev/null
+/* Unintentional nested function usage. */
+/* Due to missing right braces '}', the following functions are parsed as
+ nested functions. This ran into an ICE. */
+
+void foo (void)
+{
+ #pragma acc parallel
+ {
+ #pragma acc loop independent
+ for (int i = 0; i < 16; i++)
+ ;
+ // Note right brace '}' commented out here.
+ //}
+}
+void bar (void)
+{
+}
+
+// Adding right brace '}' here, to make this compile.
+}
+
+
+// ..., and the other way round:
+
+void BAR (void)
+{
+// Note right brace '}' commented out here.
+//}
+
+void FOO (void)
+{
+ #pragma acc parallel
+ {
+ #pragma acc loop independent
+ for (int i = 0; i < 16; i++)
+ ;
+ }
+}
+
+// Adding right brace '}' here, to make this compile.
+}
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. vector" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. seq" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. auto" 2 "gimple" } }
-! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" } }
+! XFAILed: OpenACC tile clauses are discarded during gimplification.
+! { dg-final { scan-tree-dump-times "acc loop private.i. private.j. tile.2, 3" 2 "gimple" { xfail *-*-* } } }
! { dg-final { scan-tree-dump-times "acc loop private.i. independent" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "private.z" 2 "gimple" } }
! { dg-final { scan-tree-dump-times "omp target oacc_\[^ \]+ map.force_tofrom:y" 2 "gimple" } }
--- /dev/null
+! { dg-additional-options "-fcray-pointer" }
+! See also cray.f95.
+
+program test
+ call oacc1
+contains
+ subroutine oacc1
+ implicit none
+ integer :: i
+ real :: pointee
+ pointer (ptr, pointee)
+ !$acc declare device_resident (pointee)
+ !$acc declare device_resident (ptr)
+ !$acc data copy (pointee) ! { dg-error "Cray pointee" }
+ !$acc end data
+ !$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
+ !$acc end data
+ !$acc parallel private (pointee) ! { dg-error "Cray pointee" }
+ !$acc end parallel
+ !$acc host_data use_device (pointee) ! { dg-error "Cray pointee" }
+ !$acc end host_data
+ !$acc parallel loop reduction(+:pointee) ! { dg-error "Cray pointee" }
+ do i = 1,5
+ enddo
+ !$acc end parallel loop
+ !$acc parallel loop
+ do i = 1,5
+ !$acc cache (pointee) ! { dg-error "Cray pointee" }
+ enddo
+ !$acc end parallel loop
+ !$acc update device (pointee) ! { dg-error "Cray pointee" }
+ !$acc update host (pointee) ! { dg-error "Cray pointee" }
+ !$acc update self (pointee) ! { dg-error "Cray pointee" }
+ !$acc data copy (ptr)
+ !$acc end data
+ !$acc data deviceptr (ptr) ! { dg-error "Cray pointer" }
+ !$acc end data
+ !$acc parallel private (ptr)
+ !$acc end parallel
+ !$acc host_data use_device (ptr) ! { dg-error "Cray pointer" }
+ !$acc end host_data
+ !$acc parallel loop reduction(+:ptr) ! { dg-error "Cray pointer" }
+ do i = 1,5
+ enddo
+ !$acc end parallel loop
+ !$acc parallel loop
+ do i = 1,5
+ !TODO: This must fail, as in openacc-1_0-branch.
+ !$acc cache (ptr) ! { dg-error "" "TODO" { xfail *-*-* } }
+ enddo
+ !$acc end parallel loop
+ !$acc update device (ptr)
+ !$acc update host (ptr)
+ !$acc update self (ptr)
+ end subroutine oacc1
+end program test
-! { dg-do compile }
! { dg-additional-options "-fcray-pointer" }
+! See also cray-2.f95.
module test
contains
integer :: i
real :: pointee
pointer (ptr, pointee)
- !$acc declare device_resident (pointee)
- !$acc declare device_resident (ptr)
+ !$acc declare device_resident (pointee)
+ !$acc declare device_resident (ptr)
!$acc data copy (pointee) ! { dg-error "Cray pointee" }
!$acc end data
!$acc data deviceptr (pointee) ! { dg-error "Cray pointee" }
--- /dev/null
+! See also loop-1.f95.
+
+program test
+ call test1
+contains
+
+subroutine test1
+ integer :: i, j, k, b(10)
+ integer, dimension (30) :: a
+ double precision :: d
+ real :: r
+ i = 0
+ !$acc loop
+ do 100 ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+ if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+ 100 i = i + 1
+ i = 0
+ !$acc loop
+ do ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+ if (i .gt. 0) exit ! { dg-error "EXIT statement" }
+ i = i + 1
+ end do
+ i = 0
+ !$acc loop
+ do 200 while (i .lt. 4) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+ 200 i = i + 1
+ !$acc loop
+ do while (i .lt. 8) ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+ i = i + 1
+ end do
+ !$acc loop
+ do 300 d = 1, 30, 6
+ i = d
+ 300 a(i) = 1
+ ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+ ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
+ !$acc loop
+ do d = 1, 30, 5
+ i = d
+ a(i) = 2
+ end do
+ ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+ ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
+ !$acc loop
+ do i = 1, 30
+ if (i .eq. 16) exit ! { dg-error "EXIT statement" }
+ end do
+ !$acc loop
+ outer: do i = 1, 30
+ do j = 5, 10
+ if (i .eq. 6 .and. j .eq. 7) exit outer ! { dg-error "EXIT statement" }
+ end do
+ end do outer
+ last: do i = 1, 30
+ end do last
+
+ ! different types of loop are allowed
+ !$acc loop
+ do i = 1,10
+ end do
+ !$acc loop
+ do 400, i = 1,10
+400 a(i) = i
+
+ ! after loop directive must be loop
+ !$acc loop
+ a(1) = 1 ! { dg-error "Expected DO loop" }
+ do i = 1,10
+ enddo
+
+ ! combined directives may be used with/without end
+ !$acc parallel loop
+ do i = 1,10
+ enddo
+ !$acc parallel loop
+ do i = 1,10
+ enddo
+ !$acc end parallel loop
+ !$acc kernels loop
+ do i = 1,10
+ enddo
+ !$acc kernels loop
+ do i = 1,10
+ enddo
+ !$acc end kernels loop
+
+ !$acc kernels loop reduction(max:i)
+ do i = 1,10
+ enddo
+ !$acc kernels
+ !$acc loop reduction(max:i)
+ do i = 1,10
+ enddo
+ !$acc end kernels
+
+ !$acc parallel loop collapse(0) ! { dg-error "constant positive integer" }
+ do i = 1,10
+ enddo
+
+ !$acc parallel loop collapse(-1) ! { dg-error "constant positive integer" }
+ do i = 1,10
+ enddo
+
+ !$acc parallel loop collapse(i) ! { dg-error "Constant expression required" }
+ do i = 1,10
+ enddo
+
+ !$acc parallel loop collapse(4) ! { dg-error "not enough DO loops for collapsed" }
+ do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ a(i+j-k) = i + j + k
+ end do
+ end do
+ end do
+ !$acc parallel loop collapse(2)
+ do i = 1, 5, 2
+ do j = i + 1, 7, i ! { dg-error "collapsed loops don.t form rectangular iteration space" }
+ end do
+ end do
+ !$acc parallel loop collapse(2)
+ do i = 1, 3
+ do j = 4, 6
+ end do
+ end do
+ !$acc parallel loop collapse(2)
+ do i = 1, 3
+ do j = 4, 6
+ end do
+ k = 4
+ end do
+ !$acc parallel loop collapse(3-1)
+ do i = 1, 3
+ do j = 4, 6
+ end do
+ k = 4
+ end do
+ !$acc parallel loop collapse(1+1)
+ do i = 1, 3
+ do j = 4, 6
+ end do
+ k = 4
+ end do
+ !$acc parallel loop collapse(2)
+ do i = 1, 3
+ do ! { dg-error "cannot be a DO WHILE or DO without loop control" }
+ end do
+ end do
+ !$acc parallel loop collapse(2)
+ do i = 1, 3
+ do r = 4, 6
+ end do
+ ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+ ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
+ end do
+
+ ! Both seq and independent are not allowed
+ !$acc loop independent seq ! { dg-error "SEQ conflicts with INDEPENDENT" }
+ do i = 1,10
+ enddo
+
+
+ !$acc cache (a(1:10)) ! { dg-error "ACC CACHE directive must be inside of loop" }
+
+ do i = 1,10
+ !$acc cache(a(i:i+1))
+ enddo
+
+ do i = 1,10
+ !$acc cache(a(i:i+1))
+ a(i) = i
+ !$acc cache(a(i+2:i+2+1))
+ enddo
+
+end subroutine test1
+end program test
+! See also loop-1-2.f95.
+
module test
implicit none
contains
-subroutine test1
+subroutine test1
integer :: i, j, k, b(10)
integer, dimension (30) :: a
double precision :: d
do 300 d = 1, 30, 6
i = d
300 a(i) = 1
- ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 30 }
- ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 30 }
+ ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 32 }
+ ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 32 }
!$acc loop
do d = 1, 30, 5
i = d
a(i) = 2
end do
- ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 36 }
- ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 36 }
+ ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 38 }
+ ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 38 }
!$acc loop
do i = 1, 30
if (i .eq. 16) exit ! { dg-error "EXIT statement" }
end do last
! different types of loop are allowed
- !$acc loop
+ !$acc loop
do i = 1,10
end do
!$acc loop
a(1) = 1 ! { dg-error "Expected DO loop" }
do i = 1,10
enddo
-
- ! combined directives may be used with/without end
+
+ ! combined directives may be used with/without end
!$acc parallel loop
do i = 1,10
enddo
enddo
!$acc end kernels loop
- !$acc kernels loop reduction(max:i)
+ !$acc kernels loop reduction(max:i)
do i = 1,10
enddo
- !$acc kernels
- !$acc loop reduction(max:i)
+ !$acc kernels
+ !$acc loop reduction(max:i)
do i = 1,10
enddo
!$acc end kernels
end do
!$acc parallel loop collapse(2)
do i = 1, 3
- do j = 4, 6
+ do j = 4, 6
end do
end do
!$acc parallel loop collapse(2)
do i = 1, 3
do r = 4, 6
end do
- ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 149 }
- ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 149 }
+ ! { dg-warning "Deleted feature: Loop variable at .1. must be integer" "" { target *-*-* } 151 }
+ ! { dg-error "ACC LOOP iteration variable must be of type integer" "" { target *-*-* } 151 }
end do
! Both seq and independent are not allowed
--- /dev/null
+! { dg-additional-options "-std=f2008" }
+! See also loop-3.f95.
+
+program test
+ call test1
+contains
+subroutine test1
+ implicit none
+ integer :: i, j
+
+ ! !$acc end loop not required by spec
+ !$acc loop
+ do i = 1,5
+ enddo
+ !$acc end loop ! { dg-warning "Redundant" }
+
+ !$acc loop
+ do i = 1,5
+ enddo
+ j = 1
+ !$acc end loop ! { dg-error "Unexpected" }
+
+ !$acc parallel
+ !$acc loop
+ do i = 1,5
+ enddo
+ !$acc end parallel
+ !$acc end loop ! { dg-error "Unexpected" }
+
+ ! OpenACC supports Fortran 2008 do concurrent statement
+ !$acc loop
+ do concurrent (i = 1:5)
+ end do
+
+ !$acc loop
+ outer_loop: do i = 1, 5
+ inner_loop: do j = 1,5
+ if (i .eq. j) cycle outer_loop
+ if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
+ end do inner_loop
+ end do outer_loop
+
+ outer_loop1: do i = 1, 5
+ !$acc loop
+ inner_loop1: do j = 1,5
+ if (i .eq. j) cycle outer_loop1 ! { dg-error "CYCLE statement" }
+ end do inner_loop1
+ end do outer_loop1
+
+ !$acc loop collapse(2)
+ outer_loop2: do i = 1, 5
+ inner_loop2: do j = 1,5
+ if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
+ if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
+ end do inner_loop2
+ end do outer_loop2
+end subroutine test1
+end program test
-! { dg-do compile }
! { dg-additional-options "-std=f2008" }
+! See also loop-3-2.f95.
subroutine test1
implicit none
integer :: i, j
-
+
! !$acc end loop not required by spec
!$acc loop
do i = 1,5
enddo
!$acc end parallel
!$acc end loop ! { dg-error "Unexpected" }
-
+
! OpenACC supports Fortran 2008 do concurrent statement
!$acc loop
do concurrent (i = 1:5)
if (i .eq. j) cycle outer_loop
if (i .ne. j) exit outer_loop ! { dg-error "EXIT statement" }
end do inner_loop
- end do outer_loop
+ end do outer_loop
outer_loop1: do i = 1, 5
!$acc loop
if (i .eq. j) cycle outer_loop2 ! { dg-error "CYCLE statement" }
if (i .ne. j) exit outer_loop2 ! { dg-error "EXIT statement" }
end do inner_loop2
- end do outer_loop2
+ end do outer_loop2
end subroutine test1
-
--- /dev/null
+! Exercise nested function decomposition, gcc/tree-nested.c.
+! See gcc/testsuite/gcc.dg/goacc/nested-function-1.c for the C version.
+
+program main
+ integer, parameter :: N = 100
+ integer :: nonlocal_arg
+ integer :: nonlocal_a(N)
+ integer :: nonlocal_i
+ integer :: nonlocal_j
+
+ nonlocal_a (:) = 5
+ nonlocal_arg = 5
+
+ call local ()
+ call nonlocal ()
+
+contains
+
+ subroutine local ()
+ integer :: local_i
+ integer :: local_arg
+ integer :: local_a(N)
+ integer :: local_j
+
+ local_a (:) = 5
+ local_arg = 5
+
+ !$acc kernels loop &
+ !$acc gang(num:local_arg) worker(local_arg) vector(local_arg) &
+ !$acc wait async(local_arg)
+ do local_i = 1, N
+ !$acc cache (local_a(local_i:local_i + 5))
+ local_a(local_i) = 100
+ !$acc loop seq tile(*)
+ do local_j = 1, N
+ enddo
+ !$acc loop auto independent tile(1)
+ do local_j = 1, N
+ enddo
+ enddo
+ !$acc end kernels loop
+
+ !$acc kernels loop &
+ !$acc gang(static:local_arg) worker(local_arg) vector(local_arg) &
+ !$acc wait(local_arg, local_arg + 1, local_arg + 2) async
+ do local_i = 1, N
+ !$acc cache (local_a(local_i:local_i + 4))
+ local_a(local_i) = 100
+ !$acc loop seq tile(1)
+ do local_j = 1, N
+ enddo
+ !$acc loop auto independent tile(*)
+ do local_j = 1, N
+ enddo
+ enddo
+ !$acc end kernels loop
+ end subroutine local
+
+ subroutine nonlocal ()
+ nonlocal_a (:) = 5
+ nonlocal_arg = 5
+
+ !$acc kernels loop &
+ !$acc gang(num:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+ !$acc wait async(nonlocal_arg)
+ do nonlocal_i = 1, N
+ !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 3))
+ nonlocal_a(nonlocal_i) = 100
+ !$acc loop seq tile(2)
+ do nonlocal_j = 1, N
+ enddo
+ !$acc loop auto independent tile(3)
+ do nonlocal_j = 1, N
+ enddo
+ enddo
+ !$acc end kernels loop
+
+ !$acc kernels loop &
+ !$acc gang(static:nonlocal_arg) worker(nonlocal_arg) vector(nonlocal_arg) &
+ !$acc wait(nonlocal_arg, nonlocal_arg + 1, nonlocal_arg + 2) async
+ do nonlocal_i = 1, N
+ !$acc cache (nonlocal_a(nonlocal_i:nonlocal_i + 2))
+ nonlocal_a(nonlocal_i) = 100
+ !$acc loop seq tile(*)
+ do nonlocal_j = 1, N
+ enddo
+ !$acc loop auto independent tile(*)
+ do nonlocal_j = 1, N
+ enddo
+ enddo
+ !$acc end kernels loop
+ end subroutine nonlocal
+end program main
+++ /dev/null
-! Exercise how tree-nested.c handles gang, worker vector and seq.
-
-! { dg-do compile }
-
-program main
- integer, parameter :: N = 100
- integer :: nonlocal_arg
- integer :: nonlocal_a(N)
- integer :: nonlocal_i
- integer :: nonlocal_j
-
- nonlocal_a (:) = 5
- nonlocal_arg = 5
-
- call local ()
- call nonlocal ()
-
-contains
-
- subroutine local ()
- integer :: local_i
- integer :: local_arg
- integer :: local_a(N)
- integer :: local_j
-
- local_a (:) = 5
- local_arg = 5
-
- !$acc kernels loop gang(num:local_arg) worker(local_arg) vector(local_arg)
- do local_i = 1, N
- local_a(local_i) = 100
- !$acc loop seq
- do local_j = 1, N
- enddo
- enddo
- !$acc end kernels loop
-
- !$acc kernels loop gang(static:local_arg) worker(local_arg) &
- !$acc vector(local_arg)
- do local_i = 1, N
- local_a(local_i) = 100
- !$acc loop seq
- do local_j = 1, N
- enddo
- enddo
- !$acc end kernels loop
- end subroutine local
-
- subroutine nonlocal ()
- nonlocal_a (:) = 5
- nonlocal_arg = 5
-
- !$acc kernels loop gang(num:nonlocal_arg) worker(nonlocal_arg) &
- !$acc vector(nonlocal_arg)
- do nonlocal_i = 1, N
- nonlocal_a(nonlocal_i) = 100
- !$acc loop seq
- do nonlocal_j = 1, N
- enddo
- enddo
- !$acc end kernels loop
-
- !$acc kernels loop gang(static:nonlocal_arg) worker(nonlocal_arg) &
- !$acc vector(nonlocal_arg)
- do nonlocal_i = 1, N
- nonlocal_a(nonlocal_i) = 100
- !$acc loop seq
- do nonlocal_j = 1, N
- enddo
- enddo
- !$acc end kernels loop
- end subroutine nonlocal
-end program main
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_ASYNC:
+ case OMP_CLAUSE_WAIT:
/* Several OpenACC clauses have optional arguments. Check if they
are present. */
if (OMP_CLAUSE_OPERAND (clause, 0))
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_DEFAULTMAP:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
break;
+ case OMP_CLAUSE_TILE:
+ /* OpenACC tile clauses are discarded during gimplification, so we
+ don't expect to see anything here. */
+ gcc_unreachable ();
+
+ case OMP_CLAUSE__CACHE_:
+ /* These clauses belong to the OpenACC cache directive, which is
+ discarded during gimplification, so we don't expect to see
+ anything here. */
+ gcc_unreachable ();
+
default:
gcc_unreachable ();
}
case OMP_CLAUSE_GANG:
case OMP_CLAUSE_WORKER:
case OMP_CLAUSE_VECTOR:
+ case OMP_CLAUSE_ASYNC:
+ case OMP_CLAUSE_WAIT:
/* Several OpenACC clauses have optional arguments. Check if they
are present. */
if (OMP_CLAUSE_OPERAND (clause, 0))
case OMP_CLAUSE_SIMD:
case OMP_CLAUSE_DEFAULTMAP:
case OMP_CLAUSE_SEQ:
+ case OMP_CLAUSE_INDEPENDENT:
+ case OMP_CLAUSE_AUTO:
break;
+ case OMP_CLAUSE_TILE:
+ /* OpenACC tile clauses are discarded during gimplification, so we
+ don't expect to see anything here. */
+ gcc_unreachable ();
+
+ case OMP_CLAUSE__CACHE_:
+ /* These clauses belong to the OpenACC cache directive, which is
+ discarded during gimplification, so we don't expect to see
+ anything here. */
+ gcc_unreachable ();
+
default:
gcc_unreachable ();
}
+2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
+ Cesar Philippidis <cesar@codesourcery.com>
+
+ PR middle-end/71373
+ * libgomp.oacc-c/nested-function-1.c: New file.
+ * libgomp.oacc-c/nested-function-2.c: Likewise.
+ * libgomp.oacc-fortran/nested-function-1.f90: Likewise.
+ * libgomp.oacc-fortran/nested-function-2.f90: Likewise.
+ * libgomp.oacc-fortran/nested-function-3.f90: Likewise.
+
2016-06-10 Thomas Schwinge <thomas@codesourcery.com>
PR c/71381
--- /dev/null
+/* Exercise nested function decomposition, gcc/tree-nested.c. */
+
+int
+main (void)
+{
+ void test1 ()
+ {
+ int i, j, k;
+ int a[4][7][8];
+
+ __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(4 - 1)
+ for (i = 1; i <= 3; i++)
+ for (j = 4; j <= 6; j++)
+ for (k = 5; k <= 7; k++)
+ a[i][j][k] = i + j + k;
+
+ for (i = 1; i <= 3; i++)
+ for (j = 4; j <= 6; j++)
+ for (k = 5; k <= 7; k++)
+ if (a[i][j][k] != i + j + k)
+ __builtin_abort();
+ }
+
+ void test2 ()
+ {
+ int i, j, k;
+ int a[4][4][4];
+
+ __builtin_memset (a, 0, sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+ for (i = 1; i <= 3; i++)
+ for (j = 1; j <= 3; j++)
+ for (k = 1; k <= 3; k++)
+ a[i][j][k] = 1;
+
+ for (i = 1; i <= 3; i++)
+ for (j = 1; j <= 3; j++)
+ for (k = 1; k <= 3; k++)
+ if (a[i][j][k] != 1)
+ __builtin_abort ();
+ }
+
+ test1 ();
+ test2 ();
+
+ return 0;
+}
--- /dev/null
+/* Exercise nested function decomposition, gcc/tree-nested.c. */
+
+int
+main (void)
+{
+ int p1 = 2, p2 = 6, p3 = 0, p4 = 4, p5 = 13, p6 = 18, p7 = 1, p8 = 1, p9 = 1;
+
+ void test1 ()
+ {
+ int i, j, k;
+ int a[4][4][4];
+
+ __builtin_memset (a, '\0', sizeof (a));
+
+#pragma acc parallel
+#pragma acc loop collapse(3)
+ for (i = 1; i <= 3; i++)
+ for (j = 1; j <= 3; j++)
+ for (k = 2; k <= 3; k++)
+ a[i][j][k] = 1;
+
+ for (i = 1; i <= 3; i++)
+ for (j = 1; j <= 3; j++)
+ for (k = 2; k <= 3; k++)
+ if (a[i][j][k] != 1)
+ __builtin_abort();
+ }
+
+ void test2 (int v1, int v2, int v3, int v4, int v5, int v6)
+ {
+ int i, j, k, l = 0, r = 0;
+ int a[7][5][19];
+ int b[7][5][19];
+
+ __builtin_memset (a, '\0', sizeof (a));
+ __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+ for (i = v1; i <= v2; i++)
+ for (j = v3; j <= v4; j++)
+ for (k = v5; k <= v6; k++)
+ {
+ l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+ if (!l)
+ a[i][j][k] += 1;
+ }
+
+ for (i = v1; i <= v2; i++)
+ for (j = v3; j <= v4; j++)
+ for (k = v5; k <= v6; k++)
+ {
+ r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+ if (!r)
+ b[i][j][k] += 1;
+ }
+
+ if (l != r)
+ __builtin_abort ();
+
+ for (i = v1; i <= v2; i++)
+ for (j = v3; j <= v4; j++)
+ for (k = v5; k <= v6; k++)
+ if (b[i][j][k] != a[i][j][k])
+ __builtin_abort ();
+ }
+
+ void test3 (int v1, int v2, int v3, int v4, int v5, int v6, int v7, int v8,
+ int v9)
+ {
+ int i, j, k, l = 0, r = 0;
+ int a[7][5][19];
+ int b[7][5][19];
+
+ __builtin_memset (a, '\0', sizeof (a));
+ __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+ for (i = v1; i <= v2; i += v7)
+ for (j = v3; j <= v4; j += v8)
+ for (k = v5; k <= v6; k += v9)
+ {
+ l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+ if (!l)
+ a[i][j][k] += 1;
+ }
+
+ for (i = v1; i <= v2; i += v7)
+ for (j = v3; j <= v4; j += v8)
+ for (k = v5; k <= v6; k += v9)
+ {
+ r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+ if (!r)
+ b[i][j][k] += 1;
+ }
+
+ if (l != r)
+ __builtin_abort ();
+
+ for (i = v1; i <= v2; i++)
+ for (j = v3; j <= v4; j++)
+ for (k = v5; k <= v6; k++)
+ if (b[i][j][k] != a[i][j][k])
+ __builtin_abort ();
+ }
+
+ void test4 ()
+ {
+ int i, j, k, l = 0, r = 0;
+ int a[7][5][19];
+ int b[7][5][19];
+ int v1 = p1, v2 = p2, v3 = p3, v4 = p4, v5 = p5, v6 = p6, v7 = p7, v8 = p8,
+ v9 = p9;
+
+ __builtin_memset (a, '\0', sizeof (a));
+ __builtin_memset (b, '\0', sizeof (b));
+
+#pragma acc parallel reduction (||:l)
+#pragma acc loop reduction (||:l) collapse(3)
+ for (i = v1; i <= v2; i += v7)
+ for (j = v3; j <= v4; j += v8)
+ for (k = v5; k <= v6; k += v9)
+ {
+ l = l || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+ if (!l)
+ a[i][j][k] += 1;
+ }
+
+ for (i = v1; i <= v2; i += v7)
+ for (j = v3; j <= v4; j += v8)
+ for (k = v5; k <= v6; k += v9)
+ {
+ r = r || i < 2 || i > 6 || j < 0 || j > 4 || k < 13 || k > 18;
+ if (!r)
+ b[i][j][k] += 1;
+ }
+
+ if (l != r)
+ __builtin_abort ();
+
+ for (i = v1; i <= v2; i++)
+ for (j = v3; j <= v4; j++)
+ for (k = v5; k <= v6; k++)
+ if (b[i][j][k] != a[i][j][k])
+ __builtin_abort ();
+ }
+
+ test1 ();
+ test2 (p1, p2, p3, p4, p5, p6);
+ test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9);
+ test4 ();
+
+ return 0;
+}
--- /dev/null
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse2
+ call test1
+ call test2
+contains
+ subroutine test1
+ integer :: i, j, k, a(1:3, 4:6, 5:7)
+ logical :: l
+ l = .false.
+ a(:, :, :) = 0
+ !$acc parallel reduction (.or.:l)
+ !$acc loop worker vector collapse(4 - 1)
+ do 164 i = 1, 3
+ do 164 j = 4, 6
+ do 164 k = 5, 7
+ a(i, j, k) = i + j + k
+164 end do
+ !$acc loop worker vector reduction(.or.:l) collapse(2)
+firstdo: do i = 1, 3
+ do j = 4, 6
+ do k = 5, 7
+ if (a(i, j, k) .ne. (i + j + k)) l = .true.
+ end do
+ end do
+ end do firstdo
+ !$acc end parallel
+ if (l) call abort
+ end subroutine test1
+
+ subroutine test2
+ integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+ a = 0
+ !$acc parallel
+ ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+ ! execution mode into something like gang-single.
+ !$acc loop gang(static:1) collapse(1)
+ do 115 k=1,3
+ !$acc loop collapse(2)
+ dokk: do kk=1,3
+ do kkk=1,3
+ a(k,kk,kkk) = 1
+ enddo
+ enddo dokk
+115 continue
+ !$acc loop gang(static:1) collapse(1)
+ do k=1,3
+ if (any(a(k,1:3,1:3).ne.1)) call abort
+ enddo
+ ! Use "gang(static:1)" here and below to effectively turn gang-redundant
+ ! execution mode into something like gang-single.
+ !$acc loop gang(static:1) collapse(1)
+ dol: do 120 l=1,3
+ !$acc loop collapse(2)
+ doll: do ll=1,3
+ do lll=1,3
+ a(l,ll,lll) = 2
+ enddo
+ enddo doll
+120 end do dol
+ !$acc loop gang(static:1) collapse(1)
+ do l=1,3
+ if (any(a(l,1:3,1:3).ne.2)) call abort
+ enddo
+ !$acc end parallel
+ end subroutine test2
+
+end program collapse2
--- /dev/null
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program collapse3
+ integer :: p1, p2, p3, p4, p5, p6, p7, p8, p9
+ p1 = 2
+ p2 = 6
+ p3 = -2
+ p4 = 4
+ p5 = 13
+ p6 = 18
+ p7 = 1
+ p8 = 1
+ p9 = 1
+ call test1
+ call test2 (p1, p2, p3, p4, p5, p6)
+ call test3 (p1, p2, p3, p4, p5, p6, p7, p8, p9)
+ call test4
+contains
+ subroutine test1
+ integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+ !$acc parallel
+ !$acc loop collapse(3)
+ do 115 k=1,3
+dokk: do kk=1,3
+ do kkk=1,3
+ a(k,kk,kkk) = 1
+ enddo
+ enddo dokk
+115 continue
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.1)) call abort
+ !$acc parallel
+ !$acc loop collapse(3)
+dol: do 120 l=1,3
+doll: do ll=1,3
+ do lll=1,3
+ a(l,ll,lll) = 2
+ enddo
+ enddo doll
+120 end do dol
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.2)) call abort
+ end subroutine test1
+
+ subroutine test2(v1, v2, v3, v4, v5, v6)
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test2
+
+ subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test3
+
+ subroutine test4
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = p1
+ v2 = p2
+ v3 = p3
+ v4 = p4
+ v5 = p5
+ v6 = p6
+ v7 = p7
+ v8 = p8
+ v9 = p9
+ !$acc parallel reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test4
+
+end program collapse3
--- /dev/null
+! Exercise nested function decomposition, gcc/tree-nested.c.
+
+! { dg-do run }
+
+program sub_collapse_3
+ call test1
+ call test2 (2, 6, -2, 4, 13, 18)
+ call test3 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+ call test4
+ call test5 (2, 6, -2, 4, 13, 18)
+ call test6 (2, 6, -2, 4, 13, 18, 1, 1, 1)
+contains
+ subroutine test1
+ integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+ !$acc parallel
+ !$acc loop collapse(3)
+ do 115 k=1,3
+dokk: do kk=1,3
+ do kkk=1,3
+ a(k,kk,kkk) = 1
+ enddo
+ enddo dokk
+115 continue
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.1)) call abort
+ !$acc parallel
+ !$acc loop collapse(3)
+dol: do 120 l=1,3
+doll: do ll=1,3
+ do lll=1,3
+ a(l,ll,lll) = 2
+ enddo
+ enddo doll
+120 end do dol
+ !$acc end parallel
+ if (any(a(1:3,1:3,1:3).ne.2)) call abort
+ end subroutine test1
+
+ subroutine test2(v1, v2, v3, v4, v5, v6)
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test2
+
+ subroutine test3(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.l) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test3
+
+ subroutine test4
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ v1 = 2
+ v2 = 6
+ v3 = -2
+ v4 = 4
+ v5 = 13
+ v6 = 18
+ v7 = 1
+ v8 = 1
+ v9 = 1
+ !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test4
+
+ subroutine test5(v1, v2, v3, v4, v5, v6)
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6) reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2
+ do j = v3, v4
+ do k = v5, v6
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test5
+
+ subroutine test6(v1, v2, v3, v4, v5, v6, v7, v8, v9)
+ integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+ integer :: v1, v2, v3, v4, v5, v6, v7, v8, v9
+ logical :: l, r
+ l = .false.
+ r = .false.
+ a(:, :, :) = 0
+ b(:, :, :) = 0
+ !$acc parallel pcopyin (v1, v2, v3, v4, v5, v6, v7, v8, v9) reduction (.or.:l)
+ !$acc loop reduction (.or.:l) collapse (3)
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ l = l.or.k.lt.13.or.k.gt.18
+ if (.not.l) a(i, j, k) = a(i, j, k) + 1
+ m = i * 100 + j * 10 + k
+ end do
+ end do
+ end do
+ !$acc end parallel
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+ r = r.or.k.lt.13.or.k.gt.18
+ if (.not.r) b(i, j, k) = b(i, j, k) + 1
+ end do
+ end do
+ end do
+ if (l .neqv. r) call abort
+ do i = v1, v2, v7
+ do j = v3, v4, v8
+ do k = v5, v6, v9
+ if (a(i, j, k) .ne. b(i, j, k)) call abort
+ end do
+ end do
+ end do
+ end subroutine test6
+
+end program sub_collapse_3