Add some test-cases that set vector length using -fopenacc-dim.
2019-01-12 Tom de Vries <tdevries@suse.de>
* testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test.
* testsuite/libgomp.oacc-fortran/gemm-2.f90: New test.
From-SVN: r267897
+2019-01-12 Tom de Vries <tdevries@suse.de>
+
+ * testsuite/libgomp.oacc-c-c++-common/pr85486-2.c: New test.
+ * testsuite/libgomp.oacc-c-c++-common/vector-length-128-2.c: New test.
+ * testsuite/libgomp.oacc-c-c++-common/vector-length-128-5.c: New test.
+ * testsuite/libgomp.oacc-fortran/gemm-2.f90: New test.
+
2019-01-12 Tom de Vries <tdevries@suse.de>
* plugin/plugin-nvptx.c (nvptx_exec): Update error message.
--- /dev/null
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+
+/* Minimized from ref-1.C. */
+
+#include <stdio.h>
+
+#pragma acc routine vector
+void __attribute__((noinline, noclone))
+Vector (int *ptr, int n, const int inc)
+{
+ #pragma acc loop vector
+ for (unsigned ix = 0; ix < n; ix++)
+ ptr[ix] += inc;
+}
+
+int
+main (void)
+{
+ const int n = 32, m=32;
+
+ int ary[m][n];
+ unsigned ix, iy;
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ ary[ix][iy] = (1 << 16) + (ix << 8) + iy;
+
+ int err = 0;
+
+#pragma acc parallel copy (ary)
+ {
+ Vector (&ary[0][0], m * n, (1 << 24) - (1 << 16));
+ }
+
+ for (ix = m; ix--;)
+ for (iy = n; iy--;)
+ if (ary[ix][iy] != ((1 << 24) + (ix << 8) + iy))
+ {
+ printf ("ary[%u][%u] = %x expected %x\n",
+ ix, iy, ary[ix][iy], ((1 << 24) + (ix << 8) + iy));
+ err++;
+ }
+
+ if (err)
+ {
+ printf ("%d failed\n", err);
+ return 1;
+ }
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=::128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+#pragma acc parallel copyin (a,b) copyout (c)
+ {
+#pragma acc loop vector
+ for (unsigned int i = 0; i < n; i++)
+ c[i] = a[i] + b[i];
+ }
+
+ for (unsigned int i = 0; i < n; ++i)
+ if (c[i] != (i % 3) + (i % 5))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */
--- /dev/null
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* { dg-additional-options "-fopenacc-dim=:2:128" } */
+/* { dg-additional-options "-foffload=-fdump-tree-oaccdevlow" } */
+/* { dg-set-target-env-var "GOMP_DEBUG" "1" } */
+
+#include <stdlib.h>
+
+#define N 1024
+
+unsigned int a[N];
+unsigned int b[N];
+unsigned int c[N];
+unsigned int n = N;
+
+int
+main (void)
+{
+ for (unsigned int i = 0; i < n; ++i)
+ {
+ a[i] = i % 3;
+ b[i] = i % 5;
+ }
+
+#pragma acc parallel copyin (a,b) copyout (c)
+ {
+#pragma acc loop worker
+ for (unsigned int i = 0; i < 4; i++)
+#pragma acc loop vector
+ for (unsigned int j = 0; j < n / 4; j++)
+ c[(i * N / 4) + j] = a[(i * N / 4) + j] + b[(i * N / 4) + j];
+ }
+
+ for (unsigned int i = 0; i < n; ++i)
+ if (c[i] != (i % 3) + (i % 5))
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 2, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=2, vectors=128" } */
--- /dev/null
+! Exercise three levels of parallelism using SGEMM from BLAS.
+
+! { dg-do run }
+! { dg-additional-options "-fopenacc-dim=::128" }
+
+! Implicitly set vector_length to 128 using -fopenacc-dim.
+subroutine openacc_sgemm (m, n, k, alpha, a, b, beta, c)
+ integer :: m, n, k
+ real :: alpha, beta
+ real :: a(k,*), b(k,*), c(m,*)
+
+ integer :: i, j, l
+ real :: temp
+
+ !$acc parallel loop copy(c(1:m,1:n)) copyin(a(1:k,1:m),b(1:k,1:n)) firstprivate (temp)
+ do j = 1, n
+ !$acc loop
+ do i = 1, m
+ temp = 0.0
+ !$acc loop reduction(+:temp)
+ do l = 1, k
+ temp = temp + a(l,i)*b(l,j)
+ end do
+ if(beta == 0.0) then
+ c(i,j) = alpha*temp
+ else
+ c(i,j) = alpha*temp + beta*c(i,j)
+ end if
+ end do
+ end do
+end subroutine openacc_sgemm
+
+subroutine host_sgemm (m, n, k, alpha, a, b, beta, c)
+ integer :: m, n, k
+ real :: alpha, beta
+ real :: a(k,*), b(k,*), c(m,*)
+
+ integer :: i, j, l
+ real :: temp
+
+ do j = 1, n
+ do i = 1, m
+ temp = 0.0
+ do l = 1, k
+ temp = temp + a(l,i)*b(l,j)
+ end do
+ if(beta == 0.0) then
+ c(i,j) = alpha*temp
+ else
+ c(i,j) = alpha*temp + beta*c(i,j)
+ end if
+ end do
+ end do
+end subroutine host_sgemm
+
+program main
+ integer, parameter :: M = 100, N = 50, K = 2000
+ real :: a(K, M), b(K, N), c(M, N), d (M, N), e (M, N)
+ real alpha, beta
+ integer i, j
+
+ a(:,:) = 1.0
+ b(:,:) = 0.25
+
+ c(:,:) = 0.0
+ d(:,:) = 0.0
+ e(:,:) = 0.0
+
+ alpha = 1.05
+ beta = 1.25
+
+ call openacc_sgemm (M, N, K, alpha, a, b, beta, c)
+ call host_sgemm (M, N, K, alpha, a, b, beta, e)
+
+ do i = 1, m
+ do j = 1, n
+ if (c(i,j) /= e(i,j)) call abort
+ end do
+ end do
+end program main