From efb56ae82bd871b64861e991466ce616688a88e9 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Sat, 12 Jan 2019 22:19:31 +0000 Subject: [PATCH] [nvptx] Enable setting vector length using -fopenacc-dim -- testcases Add some test-cases that set vector length using -fopenacc-dim. 2019-01-12 Tom de Vries * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test. * testsuite/libgomp.oacc-fortran/gemm-2.f90: New test. From-SVN: r267897 --- libgomp/ChangeLog | 7 ++ .../libgomp.oacc-c-c++-common/pr85486-2.c | 52 ++++++++++++ .../vector-length-128-2.c | 39 +++++++++ .../vector-length-128-5.c | 41 ++++++++++ .../testsuite/libgomp.oacc-fortran/gemm-2.f90 | 80 +++++++++++++++++++ 5 files changed, 219 insertions(+) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c create mode 100644 libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 00210529a55..c279edfe2f3 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,10 @@ +2019-01-12 Tom de Vries + + * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test. + * testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test. + * testsuite/libgomp.oacc-fortran/gemm-2.f90: New test. + 2019-01-12 Tom de Vries * plugin/plugin-nvptx.c (nvptx_exec): Update error message. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c new file mode 100644 index 00000000000..f6ca263166d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr85486-2.c @@ -0,0 +1,52 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=::128" } */ + +/* Minimized from ref-1.C. */ + +#include + +#pragma acc routine vector +void __attribute__((noinline, noclone)) +Vector (int *ptr, int n, const int inc) +{ + #pragma acc loop vector + for (unsigned ix = 0; ix < n; ix++) + ptr[ix] += inc; +} + +int +main (void) +{ + const int n = 32, m=32; + + int ary[m][n]; + unsigned ix, iy; + + for (ix = m; ix--;) + for (iy = n; iy--;) + ary[ix][iy] = (1 << 16) + (ix << 8) + iy; + + int err = 0; + +#pragma acc parallel copy (ary) + { + Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16)); + } + + for (ix = m; ix--;) + for (iy = n; iy--;) + if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy)) + { + printf ("ary[%u][%u] = %x expected %x\n", + ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy)); + err++; + } + + if (err) + { + printf ("%d failed\n", err); + return 1; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c new file mode 100644 index 00000000000..8b5b2a4a92d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c @@ -0,0 +1,39 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=::128" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) + { + a[i] = i % 3; + b[i] = i % 5; + } + +#pragma acc parallel copyin (a,b) copyout (c) + { +#pragma acc loop vector + for (unsigned int i = 0; i < n; i++) + c[i] = a[i] + b[i]; + } + + for (unsigned int i = 0; i < n; ++i) + if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c new file mode 100644 index 00000000000..e60f1c28db4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c @@ -0,0 +1,41 @@ +/* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-fopenacc-dim=:2:128" } */ +/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */ +/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */ + +#include + +#define N 1024 + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; +unsigned int n = N; + +int +main (void) +{ + for (unsigned int i = 0; i < n; ++i) + { + a[i] = i % 3; + b[i] = i % 5; + } + +#pragma acc parallel copyin (a,b) copyout (c) + { +#pragma acc loop worker + for (unsigned int i = 0; i < 4; i++) +#pragma acc loop vector + for (unsigned int j = 0; j < n / 4; j++) + c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j]; + } + + for (unsigned int i = 0; i < n; ++i) + if (c[i] != (i % 3) + (i % 5)) + abort (); + + return 0; +} + +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */ diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 new file mode 100644 index 00000000000..fe108732a5f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/gemm-2.f90 @@ -0,0 +1,80 @@ +! Exercise three levels of parallelism using SGEMM from BLAS. + +! { dg-do run } +! { dg-additional-options "-fopenacc-dim=::128" } + +! Implicitly set vector_length to 128 using -fopenacc-dim. +subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c) + integer :: m, n, k + real :: alpha, beta + real :: a(k,*), b(k,*), c(m,*) + + integer :: i, j, l + real :: temp + + !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp) + do j = 1, n + !$acc loop + do i = 1, m + temp = 0.0 + !$acc loop reduction(+:temp) + do l = 1, k + temp = temp + a(l,i)*b(l,j) + end do + if(beta == 0.0) then + c(i,j) = alpha*temp + else + c(i,j) = alpha*temp + beta*c(i,j) + end if + end do + end do +end subroutine openacc_sgemm + +subroutine host_sgemm (m, n, k, alpha, a, b, beta, c) + integer :: m, n, k + real :: alpha, beta + real :: a(k,*), b(k,*), c(m,*) + + integer :: i, j, l + real :: temp + + do j = 1, n + do i = 1, m + temp = 0.0 + do l = 1, k + temp = temp + a(l,i)*b(l,j) + end do + if(beta == 0.0) then + c(i,j) = alpha*temp + else + c(i,j) = alpha*temp + beta*c(i,j) + end if + end do + end do +end subroutine host_sgemm + +program main + integer, parameter :: M = 100, N = 50, K = 2000 + real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N) + real alpha, beta + integer i, j + + a(:,:) = 1.0 + b(:,:) = 0.25 + + c(:,:) = 0.0 + d(:,:) = 0.0 + e(:,:) = 0.0 + + alpha = 1.05 + beta = 1.25 + + call openacc_sgemm (M, N, K, alpha, a, b, beta, c) + call host_sgemm (M, N, K, alpha, a, b, beta, e) + + do i = 1, m + do j = 1, n + if (c(i,j) /= e(i,j)) call abort + end do + end do +end program main -- 2.30.2