Update OpenACC tests for amdgcn

author Andrew Stubbs <ams@codesourcery.com>

Thu, 14 Nov 2019 16:16:04 +0000 (16:16 +0000)

committer Andrew Stubbs <ams@codesourcery.com>

Mon, 20 Jan 2020 16:51:06 +0000 (16:51 +0000)
author Andrew Stubbs <ams@codesourcery.com>
Thu, 14 Nov 2019 16:16:04 +0000 (16:16 +0000)
committer Andrew Stubbs <ams@codesourcery.com>
Mon, 20 Jan 2020 16:51:06 +0000 (16:51 +0000)
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog

index 24cbe04bd2f35d484ca55254bacec6cb36082ffc..fa6aeed4ec4ccdda56a3a84a90d2fec64bd738d5 100644 (file)
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,33 @@
+2020-01-20  Andrew Stubbs  <ams@codesourcery.com>
+
+       * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn.
+       * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main):
+       Adjust test dimensions for amdgcn.
+       * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust
+       gang/worker/vector expectations dynamically.
+       * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+       (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+       (acc_gang): Recognise acc_device_radeon.
+       (acc_worker): Likewise.
+       (acc_vector): Likewise.
+       (main): Set expectations for amdgcn.
+       * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+       (main): Adjust gang/worker/vector expectations dynamically.
+       * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise.
+       * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations
+       for amdgcn.
+
  2020-01-17  Andrew Stubbs  <ams@codesourcery.com>
  
         * config/accel/openacc.f90 (openacc_kinds): Rename acc_device_gcn to
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c

index 34bc57e51f52a7ce761d7bfeb7941c36b92ec738..0c9ae957460103aaa8f283462b0bc66b4090256b 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -1,3 +1,6 @@
+/* AMD GCN does not use 32-lane vectors.
+   { dg-skip-if "unsuitable dimensions" { openacc_amdgcn_accel_selected } { "*" } { "" } } */
+
  /* { dg-additional-options "-fopenacc-dim=32" } */
  
  #include <stdio.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c

index 04387d36174959d90df8b15410154f16b0906e28..30f0539707f5d7de95cd916601770f79de878eac 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -128,5 +128,14 @@ int test_1 (int gp, int wp, int vp)
  
  int main ()
  {
+#ifdef ACC_DEVICE_TYPE_gcn
+  /* AMD GCN uses the autovectorizer for the vector dimension: the use
+     of a function call in vector-partitioned code in this test is not
+     currently supported.  */
+  /* AMD GCN does not currently support multiple workers.  This should be
+     set to 16 when that changes.  */
+  return test_1 (16, 1, 1);
+#else
    return test_1 (16, 16, 32);
+#endif
  }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c

index 766e5782b463cdf7bb1690739eed6e2b0c810d82..5c843012061877e5002990712e65deac7a903738 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -9,11 +9,13 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int gangsize, workersize, vectorsize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+           copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
    {
  #pragma acc loop gang worker vector
      for (unsigned ix = 0; ix < N; ix++)
@@ -32,6 +34,10 @@ int main ()
         else
           ary[ix] = ix;
        }
+
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -39,11 +45,12 @@ int main ()
        int expected = ix;
        if(ondev)
         {
-         int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+         int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+                          / (gangsize * workersize * vectorsize);
           
-         int g = ix / (chunk_size * 32 * 32);
-         int w = ix / 32 % 32;
-         int v = ix % 32;
+         int g = ix / (chunk_size * workersize * vectorsize);
+         int w = (ix / vectorsize) % workersize;
+         int v = ix % vectorsize;
  
           expected = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c

index 0bec6e19510a5ceff8d7fee94634f27f35517ef6..9c4a85f7b16bf03a3c6da3b561eec01f068202eb 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -8,8 +8,10 @@ int main ()
    int ix;
    int ondev = 0;
    int t = 0, h = 0;
-  
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
+  int gangsize, workersize, vectorsize;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+       copy(ondev) copyout(gangsize, workersize, vectorsize)
    {
  #pragma acc loop gang worker vector reduction(+:t)
      for (unsigned ix = 0; ix < N; ix++)
@@ -28,18 +30,22 @@ int main ()
           }
         t += val;
        }
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
      {
        int val = ix;
-      if(ondev)
+      if (ondev)
         {
-         int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+         int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+                          / (gangsize * workersize * vectorsize);
           
-         int g = ix / (chunk_size * 32 * 32);
-         int w = ix / 32 % 32;
-         int v = ix % 32;
+         int g = ix / (chunk_size * vectorsize * workersize);
+         int w = ix / vectorsize % workersize;
+         int v = ix % vectorsize;
  
           val = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c

index da4921d15f98c882d0dcf3725f30e96e19e5a0bb..1173c1f57bb81619e825a4477281fe304d51011e 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
@@ -9,8 +9,9 @@ int main ()
    int ix;
    int ondev = 0;
    int t = 0,  h = 0;
+  int vectorsize;
  
-#pragma acc parallel vector_length(32) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize)
    {
  #pragma acc loop vector reduction (+:t)
      for (unsigned ix = 0; ix < N; ix++)
@@ -29,6 +30,7 @@ int main ()
           }
         t += val;
        }
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -38,7 +40,7 @@ int main ()
         {
           int g = 0;
           int w = 0;
-         int v = ix % 32;
+         int v = ix % vectorsize;
  
           val = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c

index 15e2bc2f83bf898aafded1f57abd359326850eca..84c2296a7b1238b5d39df8fdb12fb3731bac74c9 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -9,8 +9,9 @@ int main ()
    int ix;
    int ondev = 0;
    int q = 0,  h = 0;
+  int vectorsize;
  
-#pragma acc parallel vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize)
    {
      int t = q;
      
@@ -32,6 +33,7 @@ int main ()
         t += val;
        }
      q = t;
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -41,7 +43,7 @@ int main ()
         {
           int g = 0;
           int w = 0;
-         int v = ix % 32;
+         int v = ix % vectorsize;
  
           val = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c

index 6bbd04fffeaa99e2d8319a3a225d2e0f4115cd98..648f89e166806745cedd70af9297b623b42b6874 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -8,8 +8,10 @@ int main ()
    int ix;
    int ondev = 0;
    int t = 0,  h = 0;
+  int workersize;
  
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+           copyout(workersize)
    {
  #pragma acc loop worker reduction(+:t)
      for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,7 @@ int main ()
           }
         t += val;
        }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -36,7 +39,7 @@ int main ()
        if(ondev)
         {
           int g = 0;
-         int w = ix % 32;
+         int w = ix % workersize;
           int v = 0;
  
           val = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c

index c63a5d4f808b51122e9c4ece51ddc2a828d65f4e..f9fcf3703af9c8a4f198dc8ec9b6e8aac6c4ab9c 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -8,8 +8,10 @@ int main ()
    int ix;
    int ondev = 0;
    int q = 0,  h = 0;
+  int workersize;
  
-#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \
+           copyout(workersize)
    {
      int t = q;
      
@@ -31,6 +33,7 @@ int main ()
         t += val;
        }
      q = t;
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
        if(ondev)
         {
           int g = 0;
-         int w = ix % 32;
+         int w = ix % workersize;
           int v = 0;
  
           val = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c

index 71d3969f7b630effc425d5c04689a347d4197a17..c360ad11e7cbe025577ce41ba92b98fbae7a9e39 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
@@ -8,8 +8,10 @@ int main ()
    int ix;
    int ondev = 0;
    int t = 0, h = 0;
+  int workersize, vectorsize;
    
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+           copyout(workersize, vectorsize)
    {
  #pragma acc loop worker vector reduction (+:t)
      for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,8 @@ int main ()
           }
         t += val;
        }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -36,8 +40,8 @@ int main ()
        if(ondev)
         {
           int g = 0;
-         int w = (ix / 32) % 32;
-         int v = ix % 32;
+         int w = (ix / vectorsize) % workersize;
+         int v = ix % vectorsize;
  
           val = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c

index 6010cd2498a6ec1423fcee49a9344e404629fa0c..8c858f305633d2f2cdf27d34ae50bf5750a9307d 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
@@ -9,11 +9,13 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int vectorsize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+           copyout(vectorsize)
    {
  #pragma acc loop vector
      for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
         else
           ary[ix] = ix;
        }
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -40,7 +43,7 @@ int main ()
         {
           int g = 0;
           int w = 0;
-         int v = ix % 32;
+         int v = ix % vectorsize;
  
           expected = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c

index fa6fb9164e6365a082672fd45af5608152de511a..5fe486f50a158a2aafcb8e4f2288a3e12eda9417 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -9,11 +9,13 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int workersize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+           copyout(workersize)
    {
  #pragma acc loop worker
      for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
         else
           ary[ix] = ix;
        }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
        if(ondev)
         {
           int g = 0;
-         int w = ix % 32;
+         int w = ix % workersize;
           int v = 0;
  
           expected = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c

index cd4cc994b826493e7c0ae19c52a4b42f088a5abd..fd4e4cf5ea9c69ec7d8ef8e6a312a7e913989378 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
@@ -9,11 +9,13 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int workersize, vectorsize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+           copyout(workersize, vectorsize)
    {
  #pragma acc loop worker vector
      for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,8 @@ int main ()
         else
           ary[ix] = ix;
        }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -39,8 +43,8 @@ int main ()
        if(ondev)
         {
           int g = 0;
-         int w = (ix / 32) % 32;
-         int v = ix % 32;
+         int w = (ix / vectorsize) % workersize;
+         int v = ix % vectorsize;
  
           expected = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c

index a5edfc6ca1642799f21e80dcb8730fb701c32392..cc4c738c1db248e7a23df30354398945a1a4238d 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -14,7 +14,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
  {
    if (acc_on_device ((int) acc_device_host))
      return 0;
-  else if (acc_on_device ((int) acc_device_nvidia))
+  else if (acc_on_device ((int) acc_device_nvidia)
+          || acc_on_device ((int) acc_device_radeon))
      return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
    else
      __builtin_abort ();
@@ -25,7 +26,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
  {
    if (acc_on_device ((int) acc_device_host))
      return 0;
-  else if (acc_on_device ((int) acc_device_nvidia))
+  else if (acc_on_device ((int) acc_device_nvidia)
+          || acc_on_device ((int) acc_device_radeon))
      return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
    else
      __builtin_abort ();
@@ -36,7 +38,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
  {
    if (acc_on_device ((int) acc_device_host))
      return 0;
-  else if (acc_on_device ((int) acc_device_nvidia))
+  else if (acc_on_device ((int) acc_device_nvidia)
+          || acc_on_device ((int) acc_device_radeon))
      return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
    else
      __builtin_abort ();
@@ -282,6 +285,12 @@ int main ()
           /* The GCC nvptx back end enforces num_workers (32).  */
           workers_actual = 32;
         }
+      else if (acc_on_device (acc_device_radeon))
+       {
+         /* The GCC GCN back end is limited to num_workers (16).
+            Temporarily set this to 1 until multiple workers are permitted. */
+         workers_actual = 1; // 16;
+       }
        else
         __builtin_abort ();
  #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -328,6 +337,11 @@ int main ()
           /* We're actually executing with num_workers (32).  */
           /* workers_actual = 32; */
         }
+      else if (acc_on_device (acc_device_radeon))
+       {
+         /* The GCC GCN back end is limited to num_workers (16).  */
+         workers_actual = 16;
+       }
        else
         __builtin_abort ();
  #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -367,6 +381,11 @@ int main ()
           /* The GCC nvptx back end enforces vector_length (32).  */
           vectors_actual = 1024;
         }
+      else if (acc_on_device (acc_device_radeon))
+       {
+         /* The GCC GCN back end enforces vector_length (1): autovectorize. */
+         vectors_actual = 1;
+       }
        else
         __builtin_abort ();
  #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -407,6 +426,13 @@ int main ()
           /* The GCC nvptx back end enforces vector_length (32).  */
           vectors_actual = 32;
         }
+      else if (acc_on_device (acc_device_radeon))
+       {
+         /* Because of the way vectors are implemented for GCN, a vector loop
+            containing a seq routine call will not vectorize calls to that
+            routine.  Hence, we'll only get one "vector".  */
+         vectors_actual = 1;
+       }
        else
         __builtin_abort ();
  #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -433,6 +459,9 @@ int main ()
         in the following case.  So, limit ourselves here.  */
      if (acc_get_device_type () == acc_device_nvidia)
        gangs = 3;
+    /* Similar appears to be true for GCN.  */
+    if (acc_get_device_type () == acc_device_radeon)
+      gangs = 3;
      int gangs_actual = gangs;
  #define WORKERS 3
      int workers_actual = WORKERS;
@@ -459,6 +488,13 @@ int main ()
           /* The GCC nvptx back end enforces vector_length (32).  */
           vectors_actual = 32;
         }
+      else if (acc_on_device (acc_device_radeon))
+       {
+         /* Temporary setting, until multiple workers are permitted.  */
+         workers_actual = 1;
+         /* See above comments about GCN vectors_actual.  */
+         vectors_actual = 1;
+       }
        else
         __builtin_abort ();
  #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c

index a97e046b687f8bccb03ee6c21f8c926c5f3c498c..da13d84908a82ef18b5e85896b5b96c7ca311094 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
@@ -30,14 +30,18 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int gangsize, workersize, vectorsize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
    {
      ondev = acc_on_device (acc_device_not_host);
      gang (ary);
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -45,11 +49,12 @@ int main ()
        int expected = ix;
        if(ondev)
         {
-         int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+         int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+                          / (gangsize * workersize * vectorsize);
           
-         int g = ix / (chunk_size * 32 * 32);
-         int w = ix / 32 % 32;
-         int v = ix % 32;
+         int g = ix / (chunk_size * vectorsize * workersize);
+         int w = (ix / vectorsize) % workersize;
+         int v = ix % vectorsize;
  
           expected = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c

index b1e3e3a596af9af115a7a9e184e62fad71ff807b..dd7bb6cdcd1ec2a048a2bf1780d97b0c4e917c82 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
@@ -30,14 +30,17 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int vectorsize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+           copyout(vectorsize)
    {
      ondev = acc_on_device (acc_device_not_host);
      vector (ary);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -47,7 +50,7 @@ int main ()
         {
           int g = 0;
           int w = 0;
-         int v = ix % 32;
+         int v = ix % vectorsize;
  
           expected = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c

index 81f1e0361c0d201e6d0fa5ce99fe30d3ec8579a8..acd9884cbd61f61bc9363cc3de2ae429034765b0 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -30,14 +30,17 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int workersize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+           copyout(workersize)
    {
      ondev = acc_on_device (acc_device_not_host);
      worker (ary);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -46,7 +49,7 @@ int main ()
        if(ondev)
         {
           int g = 0;
-         int w = ix % 32;
+         int w = ix % workersize;
           int v = 0;
  
           expected = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c

index 23dbc1ae401474be1db016559d1792609c9cead4..73696e4e59a3901248216784be70583655fb24db 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
@@ -30,14 +30,18 @@ int main ()
    int ix;
    int exit = 0;
    int ondev = 0;
+  int workersize, vectorsize;
  
    for (ix = 0; ix < N;ix++)
      ary[ix] = -1;
    
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+           copyout(workersize, vectorsize)
    {
      ondev = acc_on_device (acc_device_not_host);
      worker (ary);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
    }
  
    for (ix = 0; ix < N; ix++)
@@ -46,8 +50,8 @@ int main ()
        if(ondev)
         {
           int g = 0;
-         int w = (ix / 32) % 32;
-         int v = ix % 32;
+         int w = (ix / vectorsize) % workersize;
+         int v = ix % vectorsize;
  
           expected = (g << 16) | (w << 8) | v;
         }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c

index 886214843f14d1674c5333972e13defef6de66db..609f9f6a7da2302ce3f55af895b2e114aa20fdfd 100644 (file)
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -2,8 +2,13 @@
  #include <openacc.h>
  #include <gomp-constants.h>
  
+#ifdef ACC_DEVICE_TYPE_gcn
+#define NUM_WORKERS 16
+#define NUM_VECTORS 1
+#else
  #define NUM_WORKERS 16
  #define NUM_VECTORS 32
+#endif
  #define WIDTH 64
  #define HEIGHT 32
author	Andrew Stubbs <ams@codesourcery.com>
	Thu, 14 Nov 2019 16:16:04 +0000 (16:16 +0000)
committer	Andrew Stubbs <ams@codesourcery.com>
	Mon, 20 Jan 2020 16:51:06 +0000 (16:51 +0000)
libgomp/ChangeLog		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c		patch \| blob \| history
libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c		patch \| blob \| history