+2015-07-13 Maxim Blumenthal <maxim.blumenthal@intel.com>
+
+ * testsuite/libgomp.c++/examples-4/e.53.2.C: Renamed to...
+ * testsuite/libgomp.c++/examples-4/declare_target-2.C: ...this.
+ * testsuite/libgomp.c++/examples-4/e.51.5.C: Renamed to...
+ * testsuite/libgomp.c++/examples-4/target_data-5.C: ...this.
+ * testsuite/libgomp.c/examples-4/e.56.3.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/array_sections-3.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.56.4.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/array_sections-4.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.55.1.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/async_target-1.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.55.2.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/async_target-2.c: ...this.
+ (vec_mult_ref): Remove v1 and v2 arguments, turn them into local
+ variables.
+ (vec_mult): Likewise. Add #pragma omp taskwait.
+ (main): Adjust caller.
+ * testsuite/libgomp.c/examples-4/e.53.1.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/declare_target-1.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.53.3.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/declare_target-3.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.53.4.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/declare_target-4.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.53.5.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/declare_target-5.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.57.1.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/device-1.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.57.2.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/device-2.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.57.3.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/device-3.c: ...this.
+ * testsuite/libgomp.c/examples-4/simd-1.c: New file.
+ * testsuite/libgomp.c/examples-4/simd-2.c: New file.
+ * testsuite/libgomp.c/examples-4/simd-3.c: New file.
+ * testsuite/libgomp.c/examples-4/simd-4.c: New file.
+ * testsuite/libgomp.c/examples-4/simd-5.c: New file.
+ * testsuite/libgomp.c/examples-4/simd-6.c: New file.
+ * testsuite/libgomp.c/examples-4/simd-7.c: New file.
+ * testsuite/libgomp.c/examples-4/simd-8.c: New file.
+ * testsuite/libgomp.c/examples-4/e.50.1.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target-1.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.50.2.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target-2.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.50.3.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target-3.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.50.4.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target-4.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.50.5.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target-5.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.51.1.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_data-1.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.51.2.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_data-2.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.51.3.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_data-3.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.51.4.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_data-4.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.51.6.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_data-6.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.51.7.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_data-7.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.52.1.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_update-1.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.52.2.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/target_update-2.c: ...this.
+ * testsuite/libgomp.c/examples-4/task_dep-1.c: New file.
+ * testsuite/libgomp.c/examples-4/task_dep-2.c: New file.
+ * testsuite/libgomp.c/examples-4/task_dep-3.c: New file.
+ * testsuite/libgomp.c/examples-4/task_dep-4.c: New file.
+ * testsuite/libgomp.c/examples-4/task_dep-5.c: New file.
+ * testsuite/libgomp.c/examples-4/e.54.2.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/teams-2.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.54.3.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/teams-3.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.54.4.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/teams-4.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.54.5.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/teams-5.c: ...this.
+ * testsuite/libgomp.c/examples-4/e.54.6.c: Renamed to...
+ * testsuite/libgomp.c/examples-4/teams-6.c: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.56.3.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/array_sections-3.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.56.4.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/array_sections-4.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.55.1.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/async_target-1.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.55.2.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/async_target-2.f90: ...this.
+ (vec_mult): Add !$omp taskwait.
+ * testsuite/libgomp.fortran/examples-4/e.53.1.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/declare_target-1.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.53.2.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/declare_target-2.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.53.3.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/declare_target-3.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.53.4.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/declare_target-4.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.53.5.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/declare_target-5.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.57.1.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/device-1.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.57.2.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/device-2.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.57.3.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/device-3.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/simd-1.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/simd-2.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/simd-3.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/simd-4.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/simd-5.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/simd-6.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/simd-7.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/simd-8.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/e.50.1.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target-1.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.50.2.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target-2.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.50.3.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target-3.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.50.4.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target-4.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.50.5.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target-5.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.51.1.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_data-1.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.51.2.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_data-2.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.51.3.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_data-3.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.51.4.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_data-4.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.51.5.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_data-5.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.51.6.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_data-6.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.51.7.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_data-7.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.52.1.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_update-1.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.52.2.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/target_update-2.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/task_dep-1.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/task_dep-2.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/task_dep-3.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/task_dep-4.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/task_dep-5.f90: New file.
+ * testsuite/libgomp.fortran/examples-4/e.54.2.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/teams-2.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.54.3.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/teams-3.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.54.4.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/teams-4.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.54.5.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/teams-5.f90: ...this.
+ * testsuite/libgomp.fortran/examples-4/e.54.6.f90: Renamed to...
+ * testsuite/libgomp.fortran/examples-4/teams-6.f90: ...this.
+
2015-07-10 Tom de Vries <tom@codesourcery.com>
* testsuite/libgomp.fortran/parloops-exit-first-loop-alt-2.f95: New test.
--- /dev/null
+// { dg-do run }
+// { dg-require-effective-target offload_device }
+
+#include <stdlib.h>
+
+struct typeX
+{
+ int a;
+};
+
+class typeY
+{
+public:
+ int foo () { return a^0x01; }
+ int a;
+};
+
+#pragma omp declare target
+struct typeX varX;
+class typeY varY;
+#pragma omp end declare target
+
+int main ()
+{
+ varX.a = 0;
+ varY.a = 0;
+
+ #pragma omp target
+ {
+ varX.a = 100;
+ varY.a = 100;
+ }
+
+ if (varX.a != 0 || varY.a != 0)
+ abort ();
+
+ #pragma omp target update from(varX, varY)
+
+ if (varX.a != 100 || varY.a != 100)
+ abort ();
+
+ return 0;
+}
+++ /dev/null
-// { dg-do run }
-
-#include <omp.h>
-
-#define EPS 0.000001
-#define N 1000
-
-extern "C" void abort (void);
-
-void init (float *a1, float *a2, int n)
-{
- int s = -1;
- for (int i = 0; i < n; i++)
- {
- a1[i] = s * 0.01;
- a2[i] = i;
- s = -s;
- }
-}
-
-void check (float *a, float *b, int n)
-{
- for (int i = 0; i < n; i++)
- if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
- abort ();
-}
-
-void vec_mult_ref (float *&p, float *&v1, float *&v2, int n)
-{
- for (int i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (float *&p, float *&v1, float *&v2, int n)
-{
- #pragma omp target map(to: v1[0:n], v2[:n]) map(from: p[0:n])
- #pragma omp parallel for
- for (int i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-}
-
-int main ()
-{
- float *p = new float [N];
- float *p1 = new float [N];
- float *v1 = new float [N];
- float *v2 = new float [N];
-
- init (v1, v2, N);
-
- vec_mult_ref (p, v1, v2, N);
- vec_mult (p1, v1, v2, N);
-
- check (p, p1, N);
-
- delete [] p;
- delete [] p1;
- delete [] v1;
- delete [] v2;
-
- return 0;
-}
+++ /dev/null
-// { dg-do run }
-// { dg-require-effective-target offload_device }
-
-#include <stdlib.h>
-
-struct typeX
-{
- int a;
-};
-
-class typeY
-{
-public:
- int foo () { return a^0x01; }
- int a;
-};
-
-#pragma omp declare target
-struct typeX varX;
-class typeY varY;
-#pragma omp end declare target
-
-int main ()
-{
- varX.a = 0;
- varY.a = 0;
-
- #pragma omp target
- {
- varX.a = 100;
- varY.a = 100;
- }
-
- if (varX.a != 0 || varY.a != 0)
- abort ();
-
- #pragma omp target update from(varX, varY)
-
- if (varX.a != 100 || varY.a != 100)
- abort ();
-
- return 0;
-}
--- /dev/null
+// { dg-do run }
+
+#include <omp.h>
+
+#define EPS 0.000001
+#define N 1000
+
+extern "C" void abort (void);
+
+void init (float *a1, float *a2, int n)
+{
+ int s = -1;
+ for (int i = 0; i < n; i++)
+ {
+ a1[i] = s * 0.01;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (float *a, float *b, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref (float *&p, float *&v1, float *&v2, int n)
+{
+ for (int i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *&p, float *&v1, float *&v2, int n)
+{
+ #pragma omp target map(to: v1[0:n], v2[:n]) map(from: p[0:n])
+ #pragma omp parallel for
+ for (int i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ float *p = new float [N];
+ float *p1 = new float [N];
+ float *v1 = new float [N];
+ float *v2 = new float [N];
+
+ init (v1, v2, N);
+
+ vec_mult_ref (p, v1, v2, N);
+ vec_mult (p1, v1, v2, N);
+
+ check (p, p1, N);
+
+ delete [] p;
+ delete [] p1;
+ delete [] v1;
+ delete [] v2;
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+void foo ()
+{
+ int A[30], *p;
+ #pragma omp target data map(A[0:4])
+ {
+ p = &A[0];
+ #pragma omp target map(p[7:20]) map(A[0:4])
+ {
+ A[2] = 777;
+ p[8] = 777;
+ }
+ }
+
+ if (A[2] != 777 || A[8] != 777)
+ abort ();
+}
+
+int main ()
+{
+ foo ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+void foo ()
+{
+ int A[30], *p;
+ #pragma omp target data map(A[0:10])
+ {
+ p = &A[0];
+ #pragma omp target map(p[3:7]) map(A[0:10])
+ {
+ A[2] = 777;
+ A[8] = 777;
+ p[8] = 999;
+ }
+ }
+
+ if (A[2] != 777 || A[8] != 999)
+ abort ();
+}
+
+int main ()
+{
+ foo ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-DCHUNKSZ=5000" { target { ! run_expensive_tests } } } */
+/* { dg-additional-options "-DCHUNKSZ=1000" { target run_expensive_tests } } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 100000
+
+float Y[N];
+float Z[N];
+
+#pragma omp declare target
+float F (float a)
+{
+ return -a;
+}
+#pragma omp end declare target
+
+void pipedF_ref ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ Y[i] = F (Y[i]);
+}
+
+void pipedF ()
+{
+ int i, C;
+ for (C = 0; C < N; C += CHUNKSZ)
+ {
+ #pragma omp task
+ #pragma omp target map(Z[C:CHUNKSZ])
+ #pragma omp parallel for
+ for (i = C; i < C + CHUNKSZ; i++)
+ Z[i] = F (Z[i]);
+ }
+ #pragma omp taskwait
+}
+
+void init ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ Y[i] = Z[i] = 0.1 * i;
+}
+
+void check ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ float err = (Z[i] == 0.0) ? Y[i] : (Y[i] - Z[i]) / Z[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ init ();
+
+ pipedF_ref ();
+ pipedF ();
+
+ check ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+
+#pragma omp declare target
+void init (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = 0.1 * i;
+ b[i] = 0.01 * i * i;
+ }
+}
+#pragma omp end declare target
+
+void vec_mult_ref(float *p, int n)
+{
+ float *v1, *v2;
+ int i;
+
+ v1 = (float *) malloc (n * sizeof (float));
+ v2 = (float *) malloc (n * sizeof (float));
+
+ init (v1, v2, n);
+
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+
+ free (v1);
+ free (v2);
+}
+
+void vec_mult(float *p, int n)
+{
+ float *v1, *v2;
+ int i;
+
+ #pragma omp task shared(v1, v2) depend(out: v1, v2)
+ #pragma omp target map(v1, v2)
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ v1 = (float *) malloc (n * sizeof (float));
+ v2 = (float *) malloc (n * sizeof (float));
+
+ init (v1, v2, n);
+ }
+
+ #pragma omp task shared(v1, v2) depend(in: v1, v2)
+ #pragma omp target map(to: v1, v2) map(from: p[0:n])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ #pragma omp parallel for
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+
+ free (v1);
+ free (v2);
+ }
+
+ #pragma omp taskwait
+}
+
+void check (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0 ; i < n ; i++)
+ {
+ float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (N * sizeof (float));
+ float *p2 = (float *) malloc (N * sizeof (float));
+
+ vec_mult_ref (p1, N);
+ vec_mult (p2, N);
+
+ check (p1, p2, N);
+
+ free (p1);
+ free (p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define THRESHOLD 20
+
+#pragma omp declare target
+int fib (int n)
+{
+ if (n <= 0)
+ return 0;
+ else if (n == 1)
+ return 1;
+ else
+ return fib (n - 1) + fib (n - 2);
+}
+#pragma omp end declare target
+
+int fib_wrapper (int n)
+{
+ int x = 0;
+
+ #pragma omp target if(n > THRESHOLD)
+ x = fib (n);
+
+ return x;
+}
+
+int main ()
+{
+ if (fib (15) != fib_wrapper (15))
+ abort ();
+ if (fib (25) != fib_wrapper (25))
+ abort ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.000001
+#define N 100000
+
+#pragma omp declare target
+float p1[N], p2[N], v1[N], v2[N];
+#pragma omp end declare target
+
+void init ()
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ v1[i] = s * 0.01;
+ v2[i] = i;
+ s = -s;
+ }
+}
+
+void check ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (p1[i] - p2[i] > EPS || p2[i] - p1[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p1[i] = v1[i] * v2[i];
+}
+
+void vec_mult ()
+{
+ int i;
+
+ #pragma omp target update to(v1, v2)
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p2[i] = v1[i] * v2[i];
+
+ #pragma omp target update from(p2)
+}
+
+int main ()
+{
+ init ();
+
+ vec_mult_ref ();
+ vec_mult ();
+
+ check ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-DTESTITERS=20" { target { ! run_expensive_tests } } } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 1000
+#ifndef TESTITERS
+#define TESTITERS N
+#endif
+
+#pragma omp declare target
+float Q[N][N];
+float Pfun (const int i, const int k)
+{
+ return Q[i][k] * Q[k][i];
+}
+#pragma omp end declare target
+
+void init ()
+{
+ int i, j;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ Q[i][j] = 0.001 * i * j;
+}
+
+float accum_ref (int k)
+{
+ int i;
+ float tmp = 0.0;
+
+ for (i = 0; i < N; i++)
+ tmp += Pfun (i, k);
+
+ return tmp;
+}
+
+float accum (int k)
+{
+ int i;
+ float tmp = 0.0;
+
+ #pragma omp target
+ #pragma omp parallel for reduction(+:tmp)
+ for (i = 0; i < N; i++)
+ tmp += Pfun (i, k);
+
+ return tmp;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ int i;
+
+ init ();
+
+ #pragma omp target update to(Q)
+
+ for (i = 0; i < TESTITERS; i++)
+ check (accum (i), accum_ref (i));
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target vect_simd_clones } } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+#define M 1024
+
+#pragma omp declare target
+float Q[N][N];
+#pragma omp declare simd uniform(i) linear(k) notinbranch
+float Pfun (const int i, const int k)
+{
+ return Q[i][k] * Q[k][i];
+}
+#pragma omp end declare target
+
+void init ()
+{
+ int i, j;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ Q[i][j] = 0.001 * i * j;
+}
+
+float accum_ref ()
+{
+ int i, k;
+ float tmp = 0.0;
+
+ for (i = 0; i < N; i++)
+ {
+ float tmp1 = 0.0;
+
+ for (k = 0; k < M; k++)
+ tmp1 += Pfun(i,k);
+
+ tmp += tmp1;
+ }
+
+ return tmp;
+}
+
+float accum ()
+{
+ int i, k;
+ float tmp = 0.0;
+
+ #pragma omp target
+ #pragma omp parallel for reduction(+:tmp)
+ for (i = 0; i < N; i++)
+ {
+ float tmp1 = 0.0;
+
+ #pragma omp simd reduction(+:tmp1)
+ for (k = 0; k < M; k++)
+ tmp1 += Pfun(i,k);
+
+ tmp += tmp1;
+ }
+
+ return tmp;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ init ();
+
+ #pragma omp target update to(Q)
+
+ check (accum (), accum_ref ());
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int main ()
+{
+ int a = 100;
+ int b = 0;
+ int c, d;
+
+ #pragma omp target if(a > 200 && a < 400)
+ c = omp_is_initial_device ();
+
+ #pragma omp target data map(to: b) if(a > 200 && a < 400)
+ #pragma omp target
+ {
+ b = 100;
+ d = omp_is_initial_device ();
+ }
+
+ if (b != 100 || !c || d)
+ abort ();
+
+ a += 200;
+ b = 0;
+
+ #pragma omp target if(a > 200 && a < 400)
+ c = omp_is_initial_device ();
+
+ #pragma omp target data map(to: b) if(a > 200 && a < 400)
+ #pragma omp target
+ {
+ b = 100;
+ d = omp_is_initial_device ();
+ }
+
+ if (b != 0 || c || d)
+ abort ();
+
+ a += 200;
+ b = 0;
+
+ #pragma omp target if(a > 200 && a < 400)
+ c = omp_is_initial_device ();
+
+ #pragma omp target data map(to: b) if(a > 200 && a < 400)
+ #pragma omp target
+ {
+ b = 100;
+ d = omp_is_initial_device ();
+ }
+
+ if (b != 100 || !c || d)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#define N 10
+
+int main ()
+{
+ int i;
+ int offload[N];
+ int num = omp_get_num_devices();
+
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ #pragma omp target device(i) map(from: offload[i:1])
+ offload[i] = omp_is_initial_device ();
+
+ for (i = 0; i < num; i++)
+ if (offload[i])
+ abort ();
+
+ for (i = num; i < N; i++)
+ if (!offload[i])
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int main ()
+{
+ int res;
+ int default_device = omp_get_default_device ();
+
+ #pragma omp target
+ res = omp_is_initial_device ();
+
+ if (res)
+ abort ();
+
+ omp_set_default_device (omp_get_num_devices ());
+
+ #pragma omp target
+ res = omp_is_initial_device ();
+
+ if (!res)
+ abort ();
+
+ return 0;
+}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define N 100000
-
-void init (int *a1, int *a2)
-{
- int i, s = -1;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void check (int *a, int *b)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void vec_mult_ref (int *p)
-{
- int i;
- int v1[N], v2[N];
-
- init (v1, v2);
-
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (int *p)
-{
- int i;
- int v1[N], v2[N];
-
- init (v1, v2);
-
- #pragma omp target map(p[0:N])
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-int main ()
-{
- int p1[N], p2[N];
- int v1[N], v2[N];
-
- init (v1, v2);
-
- vec_mult_ref (p1);
- vec_mult (p2);
-
- check (p1, p2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define N 100000
-
-void init (char *a1, char *a2)
-{
- char s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void check (char *a, char *b)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void vec_mult_ref (char *p)
-{
- int i;
- char v1[N], v2[N];
-
- init (v1, v2);
-
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (char *p)
-{
- int i;
- char v1[N], v2[N];
-
- init (v1, v2);
-
- #pragma omp target map(from: p[0:N])
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-int main ()
-{
- char p1[N], p2[N];
- char v1[N], v2[N];
-
- init (v1, v2);
-
- vec_mult_ref (p1);
- vec_mult (p2);
-
- check (p1, p2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define N 100000
-
-void init (long long *a1, long long *a2)
-{
- long long s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void check (long long *a, long long *b)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void vec_mult_ref (long long *p)
-{
- int i;
- long long v1[N], v2[N];
-
- init (v1, v2);
-
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (long long *p)
-{
- int i;
- long long v1[N], v2[N];
-
- init (v1, v2);
-
- #pragma omp target map(v1, v2, p[0:N])
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-int main ()
-{
- long long p1[N], p2[N];
- long long v1[N], v2[N];
-
- init (v1, v2);
-
- vec_mult_ref (p1);
- vec_mult (p2);
-
- check (p1, p2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.000001
-#define N 100000
-
-void init (double *a1, double *a2)
-{
- double s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void check (double *a, double *b)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
- abort ();
-}
-
-void vec_mult_ref (double *p, double *v1, double *v2)
-{
- int i;
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (double *p, double *v1, double *v2)
-{
- int i;
- #pragma omp target map(to: v1[0:N], v2[:N]) map(from: p[0:N])
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-int main ()
-{
- double p1[N], p2[N];
- double v1[N], v2[N];
-
- init (v1, v2);
-
- vec_mult_ref (p1, v1, v2);
- vec_mult (p2, v1, v2);
-
- check (p1, p2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
-
-#include <omp.h>
-#include <stdlib.h>
-
-#define EPS 0.000001
-#define N 100000
-#define THRESHOLD1 10000
-#define THRESHOLD2 1000
-
-void init (float *a1, float *a2)
-{
- float s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void check (float *a, float *b)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
- abort ();
-}
-
-void vec_mult_ref (float *p, float *v1, float *v2)
-{
- int i;
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (float *p, float *v1, float *v2)
-{
- int i;
- #pragma omp target if(N > THRESHOLD1) map(to: v1[0:N], v2[:N]) \
- map(from: p[0:N])
- {
- if (omp_is_initial_device ())
- abort ();
-
- #pragma omp parallel for if(N > THRESHOLD2)
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
- }
-}
-
-int main ()
-{
- float p1[N], p2[N];
- float v1[N], v2[N];
-
- init (v1, v2);
-
- vec_mult_ref (p1, v1, v2);
- vec_mult (p2, v1, v2);
-
- check (p1, p2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-const int MAX = 1800;
-
-void check (long long *a, long long *b, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void init (long long *a1, long long *a2, int N)
-{
- long long s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void vec_mult_ref (long long *p, long long *v1, long long *v2, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (long long *p, long long *v1, long long *v2, int N)
-{
- int i;
- #pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p[0:N])
- #pragma omp target
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-int main ()
-{
- long long *p1 = (long long *) malloc (MAX * sizeof (long long));
- long long *p2 = (long long *) malloc (MAX * sizeof (long long));
- long long *v1 = (long long *) malloc (MAX * sizeof (long long));
- long long *v2 = (long long *) malloc (MAX * sizeof (long long));
-
- init (v1, v2, MAX);
-
- vec_mult_ref (p1, v1, v2, MAX);
- vec_mult (p2, v1, v2, MAX);
-
- check (p1, p2, MAX);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-const int MAX = 1800;
-
-void check (char *a, char *b, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void init (char *a1, char *a2, int N)
-{
- char s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void init_again (char *a1, char *a2, int N)
-{
- char s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s * 10;
- a2[i] = i;
- s = -s;
- }
-}
-
-void vec_mult_ref (char *p, char *v1, char *v2, int N)
-{
- int i;
-
- init (v1, v2, N);
-
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-
- init_again (v1, v2, N);
-
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
-}
-
-void vec_mult (char *p, char *v1, char *v2, int N)
-{
- int i;
-
- init (v1, v2, N);
-
- #pragma omp target data map(from: p[0:N])
- {
- #pragma omp target map(to: v1[:N], v2[:N])
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-
- init_again (v1, v2, N);
-
- #pragma omp target map(to: v1[:N], v2[:N])
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
- }
-}
-
-int main ()
-{
- char *p1 = (char *) malloc (MAX * sizeof (char));
- char *p2 = (char *) malloc (MAX * sizeof (char));
- char *v1 = (char *) malloc (MAX * sizeof (char));
- char *v2 = (char *) malloc (MAX * sizeof (char));
-
- vec_mult_ref (p1, v1, v2, MAX);
- vec_mult (p2, v1, v2, MAX);
-
- check (p1, p2, MAX);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-const int ROWS = 5;
-const int COLS = 5;
-
-void init (int Q[][COLS], const int rows, const int cols)
-{
- int i, j;
- for (i = 0; i < rows; i++)
- for (j = 0; j < cols; j++)
- Q[i][j] = (i + 1) * 100 + (j + 1);
-}
-
-void check (int a[][COLS], int b[][COLS], const int rows, const int cols)
-{
- int i, j;
- for (i = 0; i < rows; i++)
- for (j = 0; j < cols; j++)
- if (a[i][j] != b[i][j])
- abort ();
-}
-
-void gramSchmidt_ref (int Q[][COLS], const int rows, const int cols)
-{
- int i, k;
-
- for (k = 0; k < cols; k++)
- {
- int tmp = 0;
-
- for (i = 0; i < rows; i++)
- tmp += (Q[i][k] * Q[i][k]);
-
- for (i = 0; i < rows; i++)
- Q[i][k] *= tmp;
- }
-}
-
-void gramSchmidt (int Q[][COLS], const int rows, const int cols)
-{
- int i, k;
-
- #pragma omp target data map(Q[0:rows][0:cols]) map(to:COLS)
- for (k = 0; k < cols; k++)
- {
- int tmp = 0;
-
- #pragma omp target
- #pragma omp parallel for reduction(+:tmp)
- for (i = 0; i < rows; i++)
- tmp += (Q[i][k] * Q[i][k]);
-
- #pragma omp target
- #pragma omp parallel for
- for (i = 0; i < rows; i++)
- Q[i][k] *= tmp;
- }
-}
-
-int main ()
-{
- int (*Q1)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
- int (*Q2)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
-
- init (Q1, ROWS, COLS);
- init (Q2, ROWS, COLS);
-
- gramSchmidt_ref (Q1, ROWS, COLS);
- gramSchmidt (Q2, ROWS, COLS);
-
- check (Q1, Q2, ROWS, COLS);
-
- free (Q1);
- free (Q2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.000001
-
-const int MAX = 1800;
-
-void check (double *a, double *b, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
- abort ();
-}
-
-void init (double *a1, double *a2, int N)
-{
- double s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void vec_mult_ref (double *p1, double *v3, double *v4, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- p1[i] = v3[i] * v4[i];
-}
-
-void foo_ref (double *p0, double *v1, double *v2, int N)
-{
- init (v1, v2, N);
- vec_mult_ref (p0, v1, v2, N);
-}
-
-void vec_mult (double *p1, double *v3, double *v4, int N)
-{
- int i;
- #pragma omp target map(to: v3[0:N], v4[:N]) map(from: p1[0:N])
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p1[i] = v3[i] * v4[i];
-}
-
-void foo (double *p0, double *v1, double *v2, int N)
-{
- init (v1, v2, N);
-
- #pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p0[0:N])
- vec_mult (p0, v1, v2, N);
-}
-
-int main ()
-{
- double *p1 = (double *) malloc (MAX * sizeof (double));
- double *p2 = (double *) malloc (MAX * sizeof (double));
- double *v1 = (double *) malloc (MAX * sizeof (double));
- double *v2 = (double *) malloc (MAX * sizeof (double));
-
- foo_ref (p1, v1, v2, MAX);
- foo (p2, v1, v2, MAX);
-
- check (p1, p2, MAX);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
-
-#include <stdlib.h>
-#include <omp.h>
-
-#define EPS 0.000001
-#define THRESHOLD 1000
-
-const int MAX = 1800;
-
-void check (float *a, float *b, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
- abort ();
-}
-
-void init (float *a1, float *a2, int N)
-{
- float s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void init_again (float *a1, float *a2, int N)
-{
- float s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s * 10;
- a2[i] = i;
- s = -s;
- }
-}
-
-void vec_mult_ref (float *p, float *v1, float *v2, int N)
-{
- int i;
-
- init (v1, v2, N);
-
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-
- init_again (v1, v2, N);
-
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
-}
-
-void vec_mult (float *p, float *v1, float *v2, int N)
-{
- int i;
-
- init (v1, v2, N);
-
- #pragma omp target data if(N > THRESHOLD) map(from: p[0:N])
- {
- #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
- {
- if (omp_is_initial_device ())
- abort;
-
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
- }
-
- init_again (v1, v2, N);
-
- #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
- {
- if (omp_is_initial_device ())
- abort ();
-
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
- }
- }
-}
-
-int main ()
-{
- float *p1 = (float *) malloc (MAX * sizeof (float));
- float *p2 = (float *) malloc (MAX * sizeof (float));
- float *v1 = (float *) malloc (MAX * sizeof (float));
- float *v2 = (float *) malloc (MAX * sizeof (float));
-
- vec_mult_ref (p1, v1, v2, MAX);
- vec_mult (p2, v1, v2, MAX);
-
- check (p1, p2, MAX);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
-
-#include <stdlib.h>
-#include <omp.h>
-
-#define THRESHOLD 1000
-
-const int MAX = 1800;
-
-void check (short *a, short *b, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void init (short *a1, short *a2, int N)
-{
- short s = -1;
- int i;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void vec_mult_ref (short *p, short *v1, short *v2, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (short *p, short *v1, short *v2, int N)
-{
- int i;
- #pragma omp target data map(from: p[0:N])
- #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
- {
- if (omp_is_initial_device ())
- abort ();
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
- }
-}
-
-int main ()
-{
- short *p1 = (short *) malloc (MAX * sizeof (short));
- short *p2 = (short *) malloc (MAX * sizeof (short));
- short *v1 = (short *) malloc (MAX * sizeof (short));
- short *v2 = (short *) malloc (MAX * sizeof (short));
-
- init (v1, v2, MAX);
-
- vec_mult_ref (p1, v1, v2, MAX);
- vec_mult (p2, v1, v2, MAX);
-
- check (p1, p2, MAX);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-const int MAX = 1800;
-
-void check (int *a, int *b, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void init (int *a1, int *a2, int N)
-{
- int i, s = -1;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-void init_again (int *a1, int *a2, int N)
-{
- int i, s = -1;
- for (i = 0; i < N; i++)
- {
- a1[i] = s * 10;
- a2[i] = i;
- s = -s;
- }
-}
-
-void vec_mult_ref (int *p, int *v1, int *v2, int N)
-{
- int i;
-
- init (v1, v2, MAX);
-
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-
- init_again (v1, v2, N);
-
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
-}
-
-void vec_mult (int *p, int *v1, int *v2, int N)
-{
- int i;
-
- init (v1, v2, MAX);
-
- #pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
- {
- #pragma omp target
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-
- init_again (v1, v2, N);
-
- #pragma omp target update to(v1[:N], v2[:N])
-
- #pragma omp target
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
- }
-}
-
-int main ()
-{
- int *p1 = (int *) malloc (MAX * sizeof (int));
- int *p2 = (int *) malloc (MAX * sizeof (int));
- int *v1 = (int *) malloc (MAX * sizeof (int));
- int *v2 = (int *) malloc (MAX * sizeof (int));
-
- vec_mult_ref (p1, v1, v2, MAX);
- vec_mult (p2, v1, v2, MAX);
-
- check (p1, p2, MAX);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-const int MAX = 1800;
-
-void check (int *a, int *b, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- if (a[i] != b[i])
- abort ();
-}
-
-void init (int *a1, int *a2, int N)
-{
- int i, s = -1;
- for (i = 0; i < N; i++)
- {
- a1[i] = s;
- a2[i] = i;
- s = -s;
- }
-}
-
-int maybe_init_again (int *a, int N)
-{
- int i;
- for (i = 0; i < N; i++)
- a[i] = i;
- return 1;
-}
-
-void vec_mult_ref (int *p, int *v1, int *v2, int N)
-{
- int i;
-
- init (v1, v2, N);
-
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-
- maybe_init_again (v1, N);
- maybe_init_again (v2, N);
-
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
-}
-
-void vec_mult (int *p, int *v1, int *v2, int N)
-{
- int i;
-
- init (v1, v2, N);
-
- #pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
- {
- int changed;
-
- #pragma omp target
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = v1[i] * v2[i];
-
- changed = maybe_init_again (v1, N);
- #pragma omp target update if (changed) to(v1[:N])
-
- changed = maybe_init_again (v2, N);
- #pragma omp target update if (changed) to(v2[:N])
-
- #pragma omp target
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p[i] = p[i] + (v1[i] * v2[i]);
- }
-}
-
-int main ()
-{
- int *p = (int *) malloc (MAX * sizeof (int));
- int *p1 = (int *) malloc (MAX * sizeof (int));
- int *v1 = (int *) malloc (MAX * sizeof (int));
- int *v2 = (int *) malloc (MAX * sizeof (int));
-
- vec_mult_ref (p, v1, v2, MAX);
- vec_mult (p1, v1, v2, MAX);
-
- check (p, p1, MAX);
-
- free (p);
- free (p1);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define THRESHOLD 20
-
-#pragma omp declare target
-int fib (int n)
-{
- if (n <= 0)
- return 0;
- else if (n == 1)
- return 1;
- else
- return fib (n - 1) + fib (n - 2);
-}
-#pragma omp end declare target
-
-int fib_wrapper (int n)
-{
- int x = 0;
-
- #pragma omp target if(n > THRESHOLD)
- x = fib (n);
-
- return x;
-}
-
-int main ()
-{
- if (fib (15) != fib_wrapper (15))
- abort ();
- if (fib (25) != fib_wrapper (25))
- abort ();
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.000001
-#define N 100000
-
-#pragma omp declare target
-float p1[N], p2[N], v1[N], v2[N];
-#pragma omp end declare target
-
-void init ()
-{
- int i, s = -1;
- for (i = 0; i < N; i++)
- {
- v1[i] = s * 0.01;
- v2[i] = i;
- s = -s;
- }
-}
-
-void check ()
-{
- int i;
- for (i = 0; i < N; i++)
- if (p1[i] - p2[i] > EPS || p2[i] - p1[i] > EPS)
- abort ();
-}
-
-void vec_mult_ref ()
-{
- int i;
- for (i = 0; i < N; i++)
- p1[i] = v1[i] * v2[i];
-}
-
-void vec_mult ()
-{
- int i;
-
- #pragma omp target update to(v1, v2)
-
- #pragma omp target
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- p2[i] = v1[i] * v2[i];
-
- #pragma omp target update from(p2)
-}
-
-int main ()
-{
- init ();
-
- vec_mult_ref ();
- vec_mult ();
-
- check ();
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-additional-options "-DTESTITERS=20" { target { ! run_expensive_tests } } } */
-
-#include <stdlib.h>
-
-#define EPS 0.00001
-#define N 1000
-#ifndef TESTITERS
-#define TESTITERS N
-#endif
-
-#pragma omp declare target
-float Q[N][N];
-float Pfun (const int i, const int k)
-{
- return Q[i][k] * Q[k][i];
-}
-#pragma omp end declare target
-
-void init ()
-{
- int i, j;
- for (i = 0; i < N; i++)
- for (j = 0; j < N; j++)
- Q[i][j] = 0.001 * i * j;
-}
-
-float accum_ref (int k)
-{
- int i;
- float tmp = 0.0;
-
- for (i = 0; i < N; i++)
- tmp += Pfun (i, k);
-
- return tmp;
-}
-
-float accum (int k)
-{
- int i;
- float tmp = 0.0;
-
- #pragma omp target
- #pragma omp parallel for reduction(+:tmp)
- for (i = 0; i < N; i++)
- tmp += Pfun (i, k);
-
- return tmp;
-}
-
-void check (float a, float b)
-{
- float err = (b == 0.0) ? a : (a - b) / b;
- if (((err > 0) ? err : -err) > EPS)
- abort ();
-}
-
-int main ()
-{
- int i;
-
- init ();
-
- #pragma omp target update to(Q)
-
- for (i = 0; i < TESTITERS; i++)
- check (accum (i), accum_ref (i));
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run { target vect_simd_clones } } */
-/* { dg-additional-options "-msse2" { target sse2_runtime } } */
-/* { dg-additional-options "-mavx" { target avx_runtime } } */
-
-#include <stdlib.h>
-
-#define EPS 0.00001
-#define N 10000
-#define M 1024
-
-#pragma omp declare target
-float Q[N][N];
-#pragma omp declare simd uniform(i) linear(k) notinbranch
-float Pfun (const int i, const int k)
-{
- return Q[i][k] * Q[k][i];
-}
-#pragma omp end declare target
-
-void init ()
-{
- int i, j;
- for (i = 0; i < N; i++)
- for (j = 0; j < N; j++)
- Q[i][j] = 0.001 * i * j;
-}
-
-float accum_ref ()
-{
- int i, k;
- float tmp = 0.0;
-
- for (i = 0; i < N; i++)
- {
- float tmp1 = 0.0;
-
- for (k = 0; k < M; k++)
- tmp1 += Pfun(i,k);
-
- tmp += tmp1;
- }
-
- return tmp;
-}
-
-float accum ()
-{
- int i, k;
- float tmp = 0.0;
-
- #pragma omp target
- #pragma omp parallel for reduction(+:tmp)
- for (i = 0; i < N; i++)
- {
- float tmp1 = 0.0;
-
- #pragma omp simd reduction(+:tmp1)
- for (k = 0; k < M; k++)
- tmp1 += Pfun(i,k);
-
- tmp += tmp1;
- }
-
- return tmp;
-}
-
-void check (float a, float b)
-{
- float err = (b == 0.0) ? a : (a - b) / b;
- if (((err > 0) ? err : -err) > EPS)
- abort ();
-}
-
-int main ()
-{
- init ();
-
- #pragma omp target update to(Q)
-
- check (accum (), accum_ref ());
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.0001
-#define N 1024*1024
-
-void init (float B[], float C[], int n)
-{
- int i;
- for (i = 0; i < n; i++)
- {
- B[i] = 0.1 * i;
- C[i] = 0.01 * i * i;
- }
-}
-
-float dotprod_ref (float B[], float C[], int n)
-{
- int i;
- float sum = 0.0;
-
- for (i = 0; i < n; i++)
- sum += B[i] * C[i];
-
- return sum;
-}
-
-float dotprod (float B[], float C[], int n, int block_size,
- int num_teams, int block_threads)
-{
- int i, i0;
- float sum = 0;
-
- #pragma omp target map(to: B[0:n], C[0:n])
- #pragma omp teams num_teams(num_teams) thread_limit(block_threads) \
- reduction(+:sum)
- #pragma omp distribute
- for (i0 = 0; i0 < n; i0 += block_size)
- #pragma omp parallel for reduction(+:sum)
- for (i = i0; i < ((i0 + block_size > n) ? n : i0 + block_size); i++)
- sum += B[i] * C[i];
-
- return sum;
-}
-
-void check (float a, float b)
-{
- float err = (b == 0.0) ? a : (a - b) / b;
- if (((err > 0) ? err : -err) > EPS)
- abort ();
-}
-
-int main ()
-{
- float *v1 = (float *) malloc (N * sizeof (float));
- float *v2 = (float *) malloc (N * sizeof (float));
-
- float p1, p2;
-
- init (v1, v2, N);
-
- p1 = dotprod_ref (v1, v2, N);
- p2 = dotprod (v1, v2, N, N / 8, 2, 8);
-
- check (p1, p2);
-
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.0001
-#define N 1024*1024
-
-void init (float B[], float C[], int n)
-{
- int i;
- for (i = 0; i < n; i++)
- {
- B[i] = 0.1 * i;
- C[i] = 0.01 * i * i;
- }
-}
-
-float dotprod_ref (float B[], float C[], int n)
-{
- int i;
- float sum = 0.0;
-
- for (i = 0; i < n; i++)
- sum += B[i] * C[i];
-
- return sum;
-}
-
-float dotprod (float B[], float C[], int n)
-{
- int i;
- float sum = 0;
-
- #pragma omp target teams map(to: B[0:n], C[0:n])
- #pragma omp distribute parallel for reduction(+:sum)
- for (i = 0; i < n; i++)
- sum += B[i] * C[i];
-
- return sum;
-}
-
-void check (float a, float b)
-{
- float err = (b == 0.0) ? a : (a - b) / b;
- if (((err > 0) ? err : -err) > EPS)
- abort ();
-}
-
-int main ()
-{
- float *v1 = (float *) malloc (N * sizeof (float));
- float *v2 = (float *) malloc (N * sizeof (float));
-
- float p1, p2;
-
- init (v1, v2, N);
-
- p1 = dotprod_ref (v1, v2, N);
- p2 = dotprod (v1, v2, N);
-
- check (p1, p2);
-
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.0001
-#define N 1024*1024
-
-void init (float B[], float C[], int n)
-{
- int i;
- for (i = 0; i < n; i++)
- {
- B[i] = 0.1 * i;
- C[i] = 0.01 * i * i;
- }
-}
-
-float dotprod_ref (float B[], float C[], int n)
-{
- int i;
- float sum = 0.0;
-
- for (i = 0; i < n; i++)
- sum += B[i] * C[i];
-
- return sum;
-}
-
-float dotprod (float B[], float C[], int n)
-{
- int i;
- float sum = 0;
-
- #pragma omp target map(to: B[0:n], C[0:n])
- #pragma omp teams num_teams(8) thread_limit(16)
- #pragma omp distribute parallel for reduction(+:sum) \
- dist_schedule(static, 1024) \
- schedule(static, 64)
- for (i = 0; i < n; i++)
- sum += B[i] * C[i];
-
- return sum;
-}
-
-void check (float a, float b)
-{
- float err = (b == 0.0) ? a : (a - b) / b;
- if (((err > 0) ? err : -err) > EPS)
- abort ();
-}
-
-int main ()
-{
- float *v1 = (float *) malloc (N * sizeof (float));
- float *v2 = (float *) malloc (N * sizeof (float));
-
- float p1, p2;
-
- init (v1, v2, N);
-
- p1 = dotprod_ref (v1, v2, N);
- p2 = dotprod (v1, v2, N);
-
- check (p1, p2);
-
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.00001
-#define N 10000
-
-void init (float *a, float *b, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- {
- a[i] = 0.1 * i;
- b[i] = 0.01 * i * i;
- }
-}
-
-void vec_mult_ref (float *p, float *v1, float *v2, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (float *p, float *v1, float *v2, int n)
-{
- int i;
- #pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
- #pragma omp distribute simd
- for (i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void check (float *a, float *b, int n)
-{
- int i;
- for (i = 0 ; i < n ; i++)
- {
- float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
- if (((err > 0) ? err : -err) > EPS)
- abort ();
- }
-}
-
-int main ()
-{
- float *p1 = (float *) malloc (N * sizeof (float));
- float *p2 = (float *) malloc (N * sizeof (float));
- float *v1 = (float *) malloc (N * sizeof (float));
- float *v2 = (float *) malloc (N * sizeof (float));
-
- init (v1, v2, N);
-
- vec_mult_ref (p1, v1, v2, N);
- vec_mult (p2, v1, v2, N);
-
- check (p1, p2, N);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-#define EPS 0.00001
-#define N 10000
-
-void init (float *a, float *b, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- {
- a[i] = 0.1 * i;
- b[i] = 0.01 * i * i;
- }
-}
-
-void vec_mult_ref (float *p, float *v1, float *v2, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void vec_mult (float *p, float *v1, float *v2, int n)
-{
- int i;
- #pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
- #pragma omp distribute parallel for simd
- for (i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-}
-
-void check (float *a, float *b, int n)
-{
- int i;
- for (i = 0 ; i < n ; i++)
- {
- float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
- if (((err > 0) ? err : -err) > EPS)
- abort ();
- }
-}
-
-int main ()
-{
- float *p1 = (float *) malloc (N * sizeof (float));
- float *p2 = (float *) malloc (N * sizeof (float));
- float *v1 = (float *) malloc (N * sizeof (float));
- float *v2 = (float *) malloc (N * sizeof (float));
-
- init (v1, v2, N);
-
- vec_mult_ref (p1, v1, v2, N);
- vec_mult (p2, v1, v2, N);
-
- check (p1, p2, N);
-
- free (p1);
- free (p2);
- free (v1);
- free (v2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-additional-options "-DCHUNKSZ=5000" { target { ! run_expensive_tests } } } */
-/* { dg-additional-options "-DCHUNKSZ=1000" { target run_expensive_tests } } */
-
-#include <stdlib.h>
-
-#define EPS 0.00001
-#define N 100000
-
-float Y[N];
-float Z[N];
-
-#pragma omp declare target
-float F (float a)
-{
- return -a;
-}
-#pragma omp end declare target
-
-void pipedF_ref ()
-{
- int i;
- for (i = 0; i < N; i++)
- Y[i] = F (Y[i]);
-}
-
-void pipedF ()
-{
- int i, C;
- for (C = 0; C < N; C += CHUNKSZ)
- {
- #pragma omp task
- #pragma omp target map(Z[C:CHUNKSZ])
- #pragma omp parallel for
- for (i = C; i < C + CHUNKSZ; i++)
- Z[i] = F (Z[i]);
- }
- #pragma omp taskwait
-}
-
-void init ()
-{
- int i;
- for (i = 0; i < N; i++)
- Y[i] = Z[i] = 0.1 * i;
-}
-
-void check ()
-{
- int i;
- for (i = 0; i < N; i++)
- {
- float err = (Z[i] == 0.0) ? Y[i] : (Y[i] - Z[i]) / Z[i];
- if (((err > 0) ? err : -err) > EPS)
- abort ();
- }
-}
-
-int main ()
-{
- init ();
-
- pipedF_ref ();
- pipedF ();
-
- check ();
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
-
-#include <omp.h>
-#include <stdlib.h>
-
-#define EPS 0.00001
-#define N 10000
-
-#pragma omp declare target
-void init (float *a, float *b, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- {
- a[i] = 0.1 * i;
- b[i] = 0.01 * i * i;
- }
-}
-#pragma omp end declare target
-
-void vec_mult_ref (float *p, float *v1, float *v2, int n)
-{
- int i;
-
- v1 = (float *) malloc (n * sizeof (float));
- v2 = (float *) malloc (n * sizeof (float));
-
- init (v1, v2, n);
-
- for (i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-
- free (v1);
- free (v2);
-}
-
-void vec_mult (float *p, float *v1, float *v2, int n)
-{
- int i;
-
- #pragma omp task shared(v1, v2) depend(out: v1, v2)
- #pragma omp target map(v1, v2)
- {
- if (omp_is_initial_device ())
- abort ();
-
- v1 = (float *) malloc (n * sizeof (float));
- v2 = (float *) malloc (n * sizeof (float));
-
- init (v1, v2, n);
- }
-
- #pragma omp task shared(v1, v2) depend(in: v1, v2)
- #pragma omp target map(to: v1, v2) map(from: p[0:n])
- {
- if (omp_is_initial_device ())
- abort ();
-
- #pragma omp parallel for
- for (i = 0; i < n; i++)
- p[i] = v1[i] * v2[i];
-
- free (v1);
- free (v2);
- }
-}
-
-void check (float *a, float *b, int n)
-{
- int i;
- for (i = 0 ; i < n ; i++)
- {
- float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
- if (((err > 0) ? err : -err) > EPS)
- abort ();
- }
-}
-
-int main ()
-{
- float *p1 = (float *) malloc (N * sizeof (float));
- float *p2 = (float *) malloc (N * sizeof (float));
- float *v1, *v2;
-
- vec_mult_ref (p1, v1, v2, N);
- vec_mult (p2, v1, v2, N);
-
- check (p1, p2, N);
-
- free (p1);
- free (p2);
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-void foo ()
-{
- int A[30], *p;
- #pragma omp target data map(A[0:4])
- {
- p = &A[0];
- #pragma omp target map(p[7:20]) map(A[0:4])
- {
- A[2] = 777;
- p[8] = 777;
- }
- }
-
- if (A[2] != 777 || A[8] != 777)
- abort ();
-}
-
-int main ()
-{
- foo ();
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-void foo ()
-{
- int A[30], *p;
- #pragma omp target data map(A[0:10])
- {
- p = &A[0];
- #pragma omp target map(p[3:7]) map(A[0:10])
- {
- A[2] = 777;
- A[8] = 777;
- p[8] = 999;
- }
- }
-
- if (A[2] != 777 || A[8] != 999)
- abort ();
-}
-
-int main ()
-{
- foo ();
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
-
-#include <omp.h>
-#include <stdlib.h>
-
-int main ()
-{
- int a = 100;
- int b = 0;
- int c, d;
-
- #pragma omp target if(a > 200 && a < 400)
- c = omp_is_initial_device ();
-
- #pragma omp target data map(to: b) if(a > 200 && a < 400)
- #pragma omp target
- {
- b = 100;
- d = omp_is_initial_device ();
- }
-
- if (b != 100 || !c || d)
- abort ();
-
- a += 200;
- b = 0;
-
- #pragma omp target if(a > 200 && a < 400)
- c = omp_is_initial_device ();
-
- #pragma omp target data map(to: b) if(a > 200 && a < 400)
- #pragma omp target
- {
- b = 100;
- d = omp_is_initial_device ();
- }
-
- if (b != 0 || c || d)
- abort ();
-
- a += 200;
- b = 0;
-
- #pragma omp target if(a > 200 && a < 400)
- c = omp_is_initial_device ();
-
- #pragma omp target data map(to: b) if(a > 200 && a < 400)
- #pragma omp target
- {
- b = 100;
- d = omp_is_initial_device ();
- }
-
- if (b != 100 || !c || d)
- abort ();
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
-
-#include <omp.h>
-#include <stdlib.h>
-
-#define N 10
-
-int main ()
-{
- int i;
- int offload[N];
- int num = omp_get_num_devices();
-
- #pragma omp parallel for
- for (i = 0; i < N; i++)
- #pragma omp target device(i) map(from: offload[i:1])
- offload[i] = omp_is_initial_device ();
-
- for (i = 0; i < num; i++)
- if (offload[i])
- abort ();
-
- for (i = num; i < N; i++)
- if (!offload[i])
- abort ();
-
- return 0;
-}
+++ /dev/null
-/* { dg-do run } */
-/* { dg-require-effective-target offload_device } */
-
-#include <omp.h>
-#include <stdlib.h>
-
-int main ()
-{
- int res;
- int default_device = omp_get_default_device ();
-
- #pragma omp target
- res = omp_is_initial_device ();
-
- if (res)
- abort ();
-
- omp_set_default_device (omp_get_num_devices ());
-
- #pragma omp target
- res = omp_is_initial_device ();
-
- if (!res)
- abort ();
-
- return 0;
-}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define N 100
+#define OFF 32
+#define EPS 0.0000000000000001
+
+#include <stdlib.h>
+
+void init(double *a, double *a_ref, double *b, double *c, int n, int ioff)
+{
+ int i;
+ for ( i = 0; i < n; i++ )
+ {
+ a[i] = i*i;
+ a_ref[i] = a[i];
+ b[i] = i+i;
+ }
+
+ int s = -1;
+ for ( i = 0; i < n+ioff; i++ )
+ {
+ c[i] = s*3;
+ s = -s;
+ }
+}
+
+void star( double *a, double *b, double *c, int n, int *ioff )
+{
+ int i;
+ #pragma omp simd
+ for ( i = 0; i < n; i++ )
+ a[i] *= b[i] * c[i+ *ioff];
+}
+
+void star_ref( double *a, double *b, double *c, int n, int *ioff )
+{
+ int i;
+ for ( i = 0; i < n; i++ )
+ a[i] *= b[i] * c[i+ *ioff];
+}
+
+void check (double *a, double *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+int main ()
+{
+ double a[N], a_ref[N], b[N], c[N+OFF];
+ int ioff = OFF;
+
+ init(a, a_ref, b, c, N, ioff);
+
+ star(a, b, c, N, &ioff);
+ star_ref(a_ref, b, c, N, &ioff);
+
+ check(a, a_ref);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target vect_simd_clones } } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define N 100
+#define EPS 0.0000000000000001
+
+#include <stdlib.h>
+
+void init(double *a, double *a_ref, double *b, int n)
+{
+ int i;
+ for ( i=0; i<N; i++ )
+ {
+ a[i] = i;
+ a_ref[i] = i;
+ b[i] = N-i;
+ }
+}
+
+#pragma omp declare simd uniform(fact)
+double add1(double a, double b, double fact)
+{
+ double c;
+ c = a + b + fact;
+ return c;
+}
+
+#pragma omp declare simd uniform(a,b,fact) linear(i:1)
+double add2(double *a, double *b, int i, double fact)
+{
+ double c;
+ c = a[i] + b[i] + fact;
+ return c;
+}
+
+#pragma omp declare simd uniform(fact) linear(a,b:1)
+double add3(double *a, double *b, double fact)
+{
+ double c;
+ c = *a + *b + fact;
+ return c;
+}
+
+void work( double *a, double *b, int n )
+{
+ int i;
+ double tmp;
+ #pragma omp simd private(tmp)
+ for ( i = 0; i < n; i++ ) {
+ tmp = add1( a[i], b[i], 1.0);
+ a[i] = add2( a, b, i, 1.0) + tmp;
+ a[i] = add3(&a[i], &b[i], 1.0);
+ }
+}
+
+void work_ref( double *a, double *b, int n )
+{
+ int i;
+ double tmp;
+ for ( i = 0; i < n; i++ ) {
+ tmp = add1( a[i], b[i], 1.0);
+ a[i] = add2( a, b, i, 1.0) + tmp;
+ a[i] = add3(&a[i], &b[i], 1.0);
+ }
+}
+
+void check (double *a, double *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+
+int main ()
+{
+ int i;
+ double a[N], a_ref[N], b[N];
+
+ init(a, a_ref, b, N);
+
+ work(a, b, N );
+ work_ref(a_ref, b, N );
+
+ check(a, a_ref);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define N 100
+#define EPS 0.0000000000000001
+
+#include <stdlib.h>
+
+void init(double *a, double *a_ref, double *b, int n)
+{
+ int i, s = -1;
+ for ( i = 0; i < n; i++ )
+ {
+ a[i] = i*i*s;
+ a_ref[i] = a[i];
+ b[i] = i+i;
+ s = -s;
+ }
+}
+
+double work( double *a, double *b, int n )
+{
+ int i;
+ double tmp, sum;
+ sum = 0.0;
+ #pragma omp simd private(tmp) reduction(+:sum)
+ for (i = 0; i < n; i++) {
+ tmp = a[i] + b[i];
+ sum += tmp;
+ }
+ return sum;
+}
+
+double work_ref( double *a, double *b, int n )
+{
+ int i;
+ double tmp, sum;
+ sum = 0.0;
+ for (i = 0; i < n; i++) {
+ tmp = a[i] + b[i];
+ sum += tmp;
+ }
+ return sum;
+}
+
+int main ()
+{
+ double a[N], a_ref[N], b[N];
+ int res, ref;
+
+ init(a, a_ref, b, N);
+
+ res = work(a, b, N);
+ ref = work_ref(a_ref, b, N);
+
+ if (res != ref)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define N 128
+#define M 16
+#define EPS 0.0000000000000001
+#define SAFELEN 16
+
+#include <stdlib.h>
+
+void init(double *a, double *b, int n)
+{
+ int i, s = -1;
+ for ( i = 0; i < n; i++ )
+ {
+ a[i] = i*i*s;
+ b[i] = a[i];
+ s = -s;
+ }
+}
+
+void work( double *b, int n, int m )
+{
+ int i;
+ #pragma omp simd safelen(SAFELEN)
+ for (i = m; i < n; i++)
+ b[i] = b[i-m] - 1.0f;
+}
+
+void work_ref( double *b, int n, int m )
+{
+ int i;
+ for (i = m; i < n; i++)
+ b[i] = b[i-m] - 1.0f;
+}
+
+void check (double *a, double *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+int main ()
+{
+ double b[N], b_ref[N];
+
+ init(b, b_ref, N);
+
+ work(b, N, M);
+ work(b_ref, N, M);
+
+ check(b, b_ref);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define N 128
+#define M 16
+#define EPS 0.0000000000000001
+#define SAFELEN 16
+
+#include <stdlib.h>
+
+void init(double a[N][N], double b[N][N], int n)
+{
+ int i, j, s = -1;
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ a[i][j] = i * j * s;
+ b[i][j] = i + j + s;
+ s = -s;
+ }
+ }
+}
+
+void work( double a[N][N], double b[N][N], double c[N][N], int n )
+{
+ int i, j;
+ double tmp;
+ #pragma omp for simd collapse(2) private(tmp)
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ tmp = a[i][j] + b[i][j];
+ c[i][j] = tmp;
+ }
+ }
+}
+
+void work_ref( double a[N][N], double b[N][N], double c[N][N], int n )
+{
+ int i, j;
+ double tmp;
+ for (i = 0; i < n; i++)
+ {
+ for (j = 0; j < n; j++)
+ {
+ tmp = a[i][j] + b[i][j];
+ c[i][j] = tmp;
+ }
+ }
+}
+
+void check (double a[N][N], double b[N][N])
+{
+ int i, j;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ if (a[i][j] - b[i][j] > EPS || b[i][j] - a[i][j] > EPS)
+ abort ();
+}
+
+int main ()
+{
+ double a[N][N], b[N][N], c[N][N], c_ref[N][N];
+
+ init(a, b, N);
+
+ work(a, b, c, N);
+ work_ref(a, b, c_ref, N);
+
+ check(c, c_ref);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target vect_simd_clones } } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define N 100
+#define EPS 0.000001
+
+#include <stdlib.h>
+#include <stdio.h>
+
+void init(int *b, float *y, int n)
+{
+ int i, s = -1;
+ for ( i=0; i<N; i++ )
+ {
+ b[i] = i*i*s;
+ y[i] = b[i] * 0.1f;
+ s = -s;
+ }
+}
+
+#pragma omp declare simd linear(p:1) notinbranch
+int foo(int *p){
+ *p = *p + 10;
+ return *p;
+}
+
+int myaddint(int *a, int *b, int n)
+{
+#pragma omp simd
+ for (int i=0; i<n; i++){
+ a[i] = foo(&b[i]); /* foo is not called under a condition */
+ }
+ return a[n-1];
+}
+
+int myaddint_ref(int *a, int *b, int n)
+{
+ for (int i=0; i<n; i++){
+ a[i] = foo(&b[i]);
+ }
+ return a[n-1];
+}
+
+#pragma omp declare simd linear(p:1) inbranch
+float goo(float *p){
+ *p = *p + 18.5f;
+ return *p;
+}
+
+int myaddfloat(float *x, float *y, int n)
+{
+#pragma omp simd
+ for (int i=0; i<n; i++){
+ x[i] = (x[i] > y[i]) ? goo(&y[i]) : y[i];
+ /* goo is called under the condition (or within a branch) */
+ }
+ return x[n-1];
+}
+
+int myaddfloat_ref(float *x, float *y, int n)
+{
+ for (int i=0; i<n; i++){
+ x[i] = (x[i] > y[i]) ? goo(&y[i]) : y[i];
+ }
+ return x[n-1];
+}
+
+void check_addint (int *a, int *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void check_addfloat (float *a, float *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+int main ()
+{
+ int i;
+ int a[N], a_ref[N], b[N];
+ float x[N], x_ref[N], y[N];
+
+ init(a, x, N);
+ init(b, y, N);
+ myaddint(a, b, N);
+ myaddfloat(x, y, N);
+
+ init(a_ref, x_ref, N);
+ init(b, y, N);
+ myaddint_ref(a_ref, b, N);
+ myaddfloat_ref(x_ref, y, N);
+
+ check_addint(a, a_ref);
+ check_addfloat(x, x_ref);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target vect_simd_clones } } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define N 45
+int a[N], a_ref[N], b[N];
+
+#pragma omp declare simd inbranch
+int fib( int n )
+{
+ if (n <= 2)
+ return n;
+ else {
+ return fib(n-1) + fib(n-2);
+ }
+}
+
+int main(void)
+{
+ int i;
+
+#pragma omp simd
+ for (i=0; i < N; i++)
+ b[i] = i;
+
+#pragma omp simd
+ for (i=0; i < N; i++)
+ a[i] = fib(b[i]);
+
+ for (i=0; i < N; i++)
+ a_ref[i] = fib(b[i]);
+
+ for (i=0; i < N; i++)
+ if (a[i] != a_ref[i])
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include <stdlib.h>
+#include <math.h>
+
+int P[1000];
+float A[1000];
+
+float do_work(float *arr)
+{
+ float pri;
+
+#pragma omp simd lastprivate(pri)
+ for (int i = 0; i < 999; ++i)
+ {
+ int j = P[i];
+
+ pri = 0.5f;
+ if (j % 2 == 0)
+ {
+ pri = A[j+1] + arr[i];
+ }
+ A[j] = pri * 1.5f;
+ pri = pri + A[j];
+ }
+
+ return pri;
+}
+
+int main(void)
+{
+ float pri, arr[1000];
+
+ for (int i = 0; i < 1000; ++i)
+ {
+ P[i] = i;
+ A[i] = i * 1.5f;
+ arr[i] = i * 1.8f;
+ }
+
+ pri = do_work(&arr[0]);
+
+ if (pri != 8237.25)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 100000
+
+void init (int *a1, int *a2)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (int *a, int *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void vec_mult_ref (int *p)
+{
+ int i;
+ int v1[N], v2[N];
+
+ init (v1, v2);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (int *p)
+{
+ int i;
+ int v1[N], v2[N];
+
+ init (v1, v2);
+
+ #pragma omp target map(p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ int p1[N], p2[N];
+ int v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1);
+ vec_mult (p2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 100000
+
+void init (char *a1, char *a2)
+{
+ char s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (char *a, char *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void vec_mult_ref (char *p)
+{
+ int i;
+ char v1[N], v2[N];
+
+ init (v1, v2);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (char *p)
+{
+ int i;
+ char v1[N], v2[N];
+
+ init (v1, v2);
+
+ #pragma omp target map(from: p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ char p1[N], p2[N];
+ char v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1);
+ vec_mult (p2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 100000
+
+void init (long long *a1, long long *a2)
+{
+ long long s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (long long *a, long long *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void vec_mult_ref (long long *p)
+{
+ int i;
+ long long v1[N], v2[N];
+
+ init (v1, v2);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (long long *p)
+{
+ int i;
+ long long v1[N], v2[N];
+
+ init (v1, v2);
+
+ #pragma omp target map(v1, v2, p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ long long p1[N], p2[N];
+ long long v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1);
+ vec_mult (p2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.000001
+#define N 100000
+
+void init (double *a1, double *a2)
+{
+ double s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (double *a, double *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref (double *p, double *v1, double *v2)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (double *p, double *v1, double *v2)
+{
+ int i;
+ #pragma omp target map(to: v1[0:N], v2[:N]) map(from: p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ double p1[N], p2[N];
+ double v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1, v1, v2);
+ vec_mult (p2, v1, v2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#define EPS 0.000001
+#define N 100000
+#define THRESHOLD1 10000
+#define THRESHOLD2 1000
+
+void init (float *a1, float *a2)
+{
+ float s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (float *a, float *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *p, float *v1, float *v2)
+{
+ int i;
+ #pragma omp target if(N > THRESHOLD1) map(to: v1[0:N], v2[:N]) \
+ map(from: p[0:N])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ #pragma omp parallel for if(N > THRESHOLD2)
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+ }
+}
+
+int main ()
+{
+ float p1[N], p2[N];
+ float v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1, v1, v2);
+ vec_mult (p2, v1, v2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (long long *a, long long *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (long long *a1, long long *a2, int N)
+{
+ long long s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (long long *p, long long *v1, long long *v2, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (long long *p, long long *v1, long long *v2, int N)
+{
+ int i;
+ #pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p[0:N])
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ long long *p1 = (long long *) malloc (MAX * sizeof (long long));
+ long long *p2 = (long long *) malloc (MAX * sizeof (long long));
+ long long *v1 = (long long *) malloc (MAX * sizeof (long long));
+ long long *v2 = (long long *) malloc (MAX * sizeof (long long));
+
+ init (v1, v2, MAX);
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (char *a, char *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (char *a1, char *a2, int N)
+{
+ char s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void init_again (char *a1, char *a2, int N)
+{
+ char s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s * 10;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (char *p, char *v1, char *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (char *p, char *v1, char *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ #pragma omp target data map(from: p[0:N])
+ {
+ #pragma omp target map(to: v1[:N], v2[:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ #pragma omp target map(to: v1[:N], v2[:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+}
+
+int main ()
+{
+ char *p1 = (char *) malloc (MAX * sizeof (char));
+ char *p2 = (char *) malloc (MAX * sizeof (char));
+ char *v1 = (char *) malloc (MAX * sizeof (char));
+ char *v2 = (char *) malloc (MAX * sizeof (char));
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int ROWS = 5;
+const int COLS = 5;
+
+void init (int Q[][COLS], const int rows, const int cols)
+{
+ int i, j;
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < cols; j++)
+ Q[i][j] = (i + 1) * 100 + (j + 1);
+}
+
+void check (int a[][COLS], int b[][COLS], const int rows, const int cols)
+{
+ int i, j;
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < cols; j++)
+ if (a[i][j] != b[i][j])
+ abort ();
+}
+
+void gramSchmidt_ref (int Q[][COLS], const int rows, const int cols)
+{
+ int i, k;
+
+ for (k = 0; k < cols; k++)
+ {
+ int tmp = 0;
+
+ for (i = 0; i < rows; i++)
+ tmp += (Q[i][k] * Q[i][k]);
+
+ for (i = 0; i < rows; i++)
+ Q[i][k] *= tmp;
+ }
+}
+
+void gramSchmidt (int Q[][COLS], const int rows, const int cols)
+{
+ int i, k;
+
+ #pragma omp target data map(Q[0:rows][0:cols]) map(to:COLS)
+ for (k = 0; k < cols; k++)
+ {
+ int tmp = 0;
+
+ #pragma omp target
+ #pragma omp parallel for reduction(+:tmp)
+ for (i = 0; i < rows; i++)
+ tmp += (Q[i][k] * Q[i][k]);
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < rows; i++)
+ Q[i][k] *= tmp;
+ }
+}
+
+int main ()
+{
+ int (*Q1)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
+ int (*Q2)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
+
+ init (Q1, ROWS, COLS);
+ init (Q2, ROWS, COLS);
+
+ gramSchmidt_ref (Q1, ROWS, COLS);
+ gramSchmidt (Q2, ROWS, COLS);
+
+ check (Q1, Q2, ROWS, COLS);
+
+ free (Q1);
+ free (Q2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.000001
+
+const int MAX = 1800;
+
+void check (double *a, double *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void init (double *a1, double *a2, int N)
+{
+ double s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (double *p1, double *v3, double *v4, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p1[i] = v3[i] * v4[i];
+}
+
+void foo_ref (double *p0, double *v1, double *v2, int N)
+{
+ init (v1, v2, N);
+ vec_mult_ref (p0, v1, v2, N);
+}
+
+void vec_mult (double *p1, double *v3, double *v4, int N)
+{
+ int i;
+ #pragma omp target map(to: v3[0:N], v4[:N]) map(from: p1[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p1[i] = v3[i] * v4[i];
+}
+
+void foo (double *p0, double *v1, double *v2, int N)
+{
+ init (v1, v2, N);
+
+ #pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p0[0:N])
+ vec_mult (p0, v1, v2, N);
+}
+
+int main ()
+{
+ double *p1 = (double *) malloc (MAX * sizeof (double));
+ double *p2 = (double *) malloc (MAX * sizeof (double));
+ double *v1 = (double *) malloc (MAX * sizeof (double));
+ double *v2 = (double *) malloc (MAX * sizeof (double));
+
+ foo_ref (p1, v1, v2, MAX);
+ foo (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+#define EPS 0.000001
+#define THRESHOLD 1000
+
+const int MAX = 1800;
+
+void check (float *a, float *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void init (float *a1, float *a2, int N)
+{
+ float s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void init_again (float *a1, float *a2, int N)
+{
+ float s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s * 10;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (float *p, float *v1, float *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ #pragma omp target data if(N > THRESHOLD) map(from: p[0:N])
+ {
+ #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
+ {
+ if (omp_is_initial_device ())
+ abort;
+
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+ }
+
+ init_again (v1, v2, N);
+
+ #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (MAX * sizeof (float));
+ float *p2 = (float *) malloc (MAX * sizeof (float));
+ float *v1 = (float *) malloc (MAX * sizeof (float));
+ float *v2 = (float *) malloc (MAX * sizeof (float));
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+#define THRESHOLD 1000
+
+const int MAX = 1800;
+
+void check (short *a, short *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (short *a1, short *a2, int N)
+{
+ short s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (short *p, short *v1, short *v2, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (short *p, short *v1, short *v2, int N)
+{
+ int i;
+ #pragma omp target data map(from: p[0:N])
+ #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+ }
+}
+
+int main ()
+{
+ short *p1 = (short *) malloc (MAX * sizeof (short));
+ short *p2 = (short *) malloc (MAX * sizeof (short));
+ short *v1 = (short *) malloc (MAX * sizeof (short));
+ short *v2 = (short *) malloc (MAX * sizeof (short));
+
+ init (v1, v2, MAX);
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (int *a, int *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (int *a1, int *a2, int N)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void init_again (int *a1, int *a2, int N)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s * 10;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, MAX);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, MAX);
+
+ #pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
+ {
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ #pragma omp target update to(v1[:N], v2[:N])
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+}
+
+int main ()
+{
+ int *p1 = (int *) malloc (MAX * sizeof (int));
+ int *p2 = (int *) malloc (MAX * sizeof (int));
+ int *v1 = (int *) malloc (MAX * sizeof (int));
+ int *v2 = (int *) malloc (MAX * sizeof (int));
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (int *a, int *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (int *a1, int *a2, int N)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+int maybe_init_again (int *a, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ a[i] = i;
+ return 1;
+}
+
+void vec_mult_ref (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ maybe_init_again (v1, N);
+ maybe_init_again (v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ #pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
+ {
+ int changed;
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ changed = maybe_init_again (v1, N);
+ #pragma omp target update if (changed) to(v1[:N])
+
+ changed = maybe_init_again (v2, N);
+ #pragma omp target update if (changed) to(v2[:N])
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+}
+
+int main ()
+{
+ int *p = (int *) malloc (MAX * sizeof (int));
+ int *p1 = (int *) malloc (MAX * sizeof (int));
+ int *v1 = (int *) malloc (MAX * sizeof (int));
+ int *v2 = (int *) malloc (MAX * sizeof (int));
+
+ vec_mult_ref (p, v1, v2, MAX);
+ vec_mult (p1, v1, v2, MAX);
+
+ check (p, p1, MAX);
+
+ free (p);
+ free (p1);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+int main()
+{
+ int x = 1;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ #pragma omp task shared(x) depend(in: x)
+ if (x != 2)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+int main()
+{
+ int x = 1;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task shared(x) depend(in: x)
+ if (x != 1)
+ abort ();
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+int main()
+{
+ int x = 0;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task shared(x) depend(out: x)
+ x = 1;
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ #pragma omp taskwait
+ if (x != 1 && x != 2)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+int main()
+{
+ int x = 1;
+ #pragma omp parallel
+ #pragma omp single
+ {
+ #pragma omp task shared(x) depend(out: x)
+ x = 2;
+ #pragma omp task shared(x) depend(in: x)
+ if (x != 2)
+ abort ();
+ #pragma omp task shared(x) depend(in: x)
+ if (x != 2)
+ abort ();
+ }
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#define N 128
+#define BS 16
+#define EPS 0.000001
+
+#include <stdlib.h>
+
+void matmul_depend (float A[N][N], float B[N][N], float C[N][N])
+{
+ int i, j, k, ii, jj, kk;
+ for (i = 0; i < N; i+=BS)
+ for (j = 0; j < N; j+=BS)
+ for (k = 0; k < N; k+=BS)
+// Note 1: i, j, k, A, B, C are firstprivate by default
+// Note 2: A, B and C are just pointers
+#pragma omp task private(ii, jj, kk) \
+ depend ( in: A[i:BS][k:BS], B[k:BS][j:BS] ) \
+ depend ( inout: C[i:BS][j:BS] )
+ for (ii = i; ii < i+BS; ii++ )
+ for (jj = j; jj < j+BS; jj++ )
+ for (kk = k; kk < k+BS; kk++ )
+ C[ii][jj] = C[ii][jj] + A[ii][kk] * B[kk][jj];
+}
+
+void matmul_ref (float A[N][N], float B[N][N], float C[N][N])
+{
+ int i, j, k;
+
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ for (k = 0; k < N; k++)
+ C[i][j] += A[i][k] * B[k][j];
+}
+
+void init (float A[N][N], float B[N][N])
+{
+ int i, j, s = -1;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ {
+ A[i][j] = i * j * s;
+ B[i][j] = i + j;
+ s = -s;
+ }
+}
+
+void init_zero (float A[N][N], float B[N][N])
+{
+ int i, j, s = -1;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ {
+ A[i][j] = 0;
+ B[i][j] = 0;
+ }
+}
+
+void check (float A[N][N], float B[N][N])
+{
+ int i, j;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ if (A[i][j] - B[i][j] > EPS || B[i][j] - A[i][j] > EPS)
+ abort ();
+}
+
+int main ()
+{
+ float A[N][N], B[N][N], C[N][N], C_ref[N][N];
+
+ init (A, B);
+ init_zero (C, C_ref);
+
+ matmul_depend (A, B, C);
+ matmul_ref (A, B, C_ref);
+
+ check (C, C_ref);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.0001
+#define N 1024*1024
+
+void init (float B[], float C[], int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ B[i] = 0.1 * i;
+ C[i] = 0.01 * i * i;
+ }
+}
+
+float dotprod_ref (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0.0;
+
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+float dotprod (float B[], float C[], int n, int block_size,
+ int num_teams, int block_threads)
+{
+ int i, i0;
+ float sum = 0;
+
+ #pragma omp target map(to: B[0:n], C[0:n])
+ #pragma omp teams num_teams(num_teams) thread_limit(block_threads) \
+ reduction(+:sum)
+ #pragma omp distribute
+ for (i0 = 0; i0 < n; i0 += block_size)
+ #pragma omp parallel for reduction(+:sum)
+ for (i = i0; i < ((i0 + block_size > n) ? n : i0 + block_size); i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ float p1, p2;
+
+ init (v1, v2, N);
+
+ p1 = dotprod_ref (v1, v2, N);
+ p2 = dotprod (v1, v2, N, N / 8, 2, 8);
+
+ check (p1, p2);
+
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.0001
+#define N 1024*1024
+
+void init (float B[], float C[], int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ B[i] = 0.1 * i;
+ C[i] = 0.01 * i * i;
+ }
+}
+
+float dotprod_ref (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0.0;
+
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+float dotprod (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0;
+
+ #pragma omp target teams map(to: B[0:n], C[0:n])
+ #pragma omp distribute parallel for reduction(+:sum)
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ float p1, p2;
+
+ init (v1, v2, N);
+
+ p1 = dotprod_ref (v1, v2, N);
+ p2 = dotprod (v1, v2, N);
+
+ check (p1, p2);
+
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.0001
+#define N 1024*1024
+
+void init (float B[], float C[], int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ B[i] = 0.1 * i;
+ C[i] = 0.01 * i * i;
+ }
+}
+
+float dotprod_ref (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0.0;
+
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+float dotprod (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0;
+
+ #pragma omp target map(to: B[0:n], C[0:n])
+ #pragma omp teams num_teams(8) thread_limit(16)
+ #pragma omp distribute parallel for reduction(+:sum) \
+ dist_schedule(static, 1024) \
+ schedule(static, 64)
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ float p1, p2;
+
+ init (v1, v2, N);
+
+ p1 = dotprod_ref (v1, v2, N);
+ p2 = dotprod (v1, v2, N);
+
+ check (p1, p2);
+
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+
+void init (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = 0.1 * i;
+ b[i] = 0.01 * i * i;
+ }
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ #pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
+ #pragma omp distribute simd
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void check (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0 ; i < n ; i++)
+ {
+ float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (N * sizeof (float));
+ float *p2 = (float *) malloc (N * sizeof (float));
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ init (v1, v2, N);
+
+ vec_mult_ref (p1, v1, v2, N);
+ vec_mult (p2, v1, v2, N);
+
+ check (p1, p2, N);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+
+void init (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = 0.1 * i;
+ b[i] = 0.01 * i * i;
+ }
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ #pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
+ #pragma omp distribute parallel for simd
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void check (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0 ; i < n ; i++)
+ {
+ float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (N * sizeof (float));
+ float *p2 = (float *) malloc (N * sizeof (float));
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ init (v1, v2, N);
+
+ vec_mult_ref (p1, v1, v2, N);
+ vec_mult (p2, v1, v2, N);
+
+ check (p1, p2, N);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+! { dg-do run }
+
+ call foo ()
+contains
+ subroutine foo ()
+ integer, target :: A(30)
+ integer, pointer :: p(:)
+ !$omp target data map(A(1:4))
+ p => A
+ !$omp target map(p(8:27)) map(A(1:4))
+ A(3) = 777
+ p(9) = 777
+ !$omp end target
+ !$omp end target data
+ if (A(3) /= 777 .or. A(9) /= 777) call abort
+ end subroutine
+end
--- /dev/null
+! { dg-do run }
+
+ call foo ()
+contains
+ subroutine foo ()
+ integer, target :: A(30)
+ integer, pointer :: p(:)
+ !$omp target data map(A(1:10))
+ p => A
+ !$omp target map(p(4:10)) map(A(1:10))
+ A(3) = 777
+ p(9) = 777
+ A(9) = 999
+ !$omp end target
+ !$omp end target data
+ if (A(3) /= 777 .or. A(9) /= 999) call abort
+ end subroutine
+end
--- /dev/null
+! { dg-do run }
+
+module e_55_1_mod
+ integer, parameter :: N = 100000, CHUNKSZ = 10000
+ real :: Y(N), Z(N)
+end module
+
+subroutine init ()
+ use e_55_1_mod, only : Y, Z, N
+ integer :: i
+ do i = 1, N
+ Y(i) = 0.1 * i
+ Z(i) = Y(i)
+ end do
+end subroutine
+
+subroutine check ()
+ use e_55_1_mod, only : Y, Z, N
+ real :: err
+ real, parameter :: EPS = 0.00001
+ integer :: i
+ do i = 1, N
+ if (Y(i) == 0.0) then
+ err = Z(i)
+ else if (Z(i) == 0.0) then
+ err = Y(i)
+ else
+ err = (Y(i) - Z(i)) / Z(i)
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+ end do
+end subroutine
+
+real function F (z)
+ !$omp declare target
+ real, intent(in) :: z
+ F = -z
+end function
+
+subroutine pipedF ()
+ use e_55_1_mod, only: Z, N, CHUNKSZ
+ integer :: C, i
+ real :: F
+ do C = 1, N, CHUNKSZ
+ !$omp task
+ !$omp target map(Z(C:C+CHUNKSZ-1))
+ !$omp parallel do
+ do i = C, C+CHUNKSZ-1
+ Z(i) = F (Z(i))
+ end do
+ !$omp end target
+ !$omp end task
+ end do
+end subroutine
+
+subroutine pipedF_ref ()
+ use e_55_1_mod, only: Y, N
+ integer :: i
+ real :: F
+ do i = 1, N
+ Y(i) = F (Y(i))
+ end do
+end subroutine
+
+program e_55_1
+ call init ()
+ call pipedF ()
+ call pipedF_ref ()
+ call check ()
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+subroutine init (v1, v2, N)
+ !$omp declare target
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+end subroutine
+
+subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+end subroutine
+
+subroutine vec_mult (p, N)
+ use omp_lib, only: omp_is_initial_device
+ real :: p(N)
+ real, allocatable :: v1(:), v2(:)
+ integer :: i
+ !$omp declare target (init)
+ !$omp target data map(to: v1, v2, N) map(from: p)
+ !$omp task shared(v1, v2, p) depend(out: v1, v2)
+ !$omp target map(to: v1, v2, N)
+ if (omp_is_initial_device ()) call abort
+ allocate (v1(N), v2(N))
+ call init (v1, v2, N)
+ !$omp end target
+ !$omp end task
+ !$omp task shared(v1, v2, p) depend(in: v1, v2)
+ !$omp target map(to: v1, v2, N) map(from: p)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ deallocate (v1, v2)
+ !$omp end target
+ !$omp end task
+ !$omp end target data
+
+ !$omp taskwait
+ call check (p, N)
+end subroutine
+
+program e_55_2
+ integer, parameter :: N = 1000
+ real :: p(N)
+ call vec_mult (p, N)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_53_1_mod
+ integer :: THRESHOLD = 20
+contains
+ integer recursive function fib (n) result (f)
+ !$omp declare target
+ integer :: n
+ if (n <= 0) then
+ f = 0
+ else if (n == 1) then
+ f = 1
+ else
+ f = fib (n - 1) + fib (n - 2)
+ end if
+ end function
+
+ integer function fib_wrapper (n)
+ integer :: x
+ !$omp target map(to: n) if(n > THRESHOLD)
+ x = fib (n)
+ !$omp end target
+ fib_wrapper = x
+ end function
+end module
+
+program e_53_1
+ use e_53_1_mod, only : fib, fib_wrapper
+ if (fib (15) /= fib_wrapper (15)) call abort
+ if (fib (25) /= fib_wrapper (25)) call abort
+end program
--- /dev/null
+! { dg-do run }
+
+program e_53_2
+ !$omp declare target (fib)
+ integer :: x, fib
+ !$omp target
+ x = fib (25)
+ !$omp end target
+ if (x /= fib (25)) call abort
+end program
+
+integer recursive function fib (n) result (f)
+ !$omp declare target
+ integer :: n
+ if (n <= 0) then
+ f = 0
+ else if (n == 1) then
+ f = 1
+ else
+ f = fib (n - 1) + fib (n - 2)
+ end if
+end function
--- /dev/null
+! { dg-do run }
+
+module e_53_3_mod
+ !$omp declare target (N, p, v1, v2)
+ integer, parameter :: N = 1000
+ real :: p(N), v1(N), v2(N)
+end module
+
+subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+end subroutine
+
+subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+end subroutine
+
+subroutine vec_mult ()
+ use e_53_3_mod
+ integer :: i
+ call init (v1, v2, N);
+ !$omp target update to(v1, v2)
+ !$omp target
+ !$omp parallel do
+ do i = 1,N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp target update from (p)
+ call check (p, N)
+end subroutine
+
+program e_53_3
+ call vec_mult ()
+end program
--- /dev/null
+! { dg-do run }
+
+module e_53_4_mod
+ !$omp declare target (N, Q)
+ integer, parameter :: N = 10
+ real :: Q(N,N)
+contains
+ real function Pfun (i, k)
+ !$omp declare target
+ integer, intent(in) :: i, k
+ Pfun = (Q(i,k) * Q(k,i))
+ end function
+end module
+
+real function accum (k) result (tmp)
+ use e_53_4_mod
+ integer :: i, k
+ tmp = 0.0e0
+ !$omp target
+ !$omp parallel do reduction(+:tmp)
+ do i = 1, N
+ tmp = tmp + Pfun (k, i)
+ end do
+ !$omp end target
+end function
+
+real function accum_ref (k) result (tmp)
+ use e_53_4_mod
+ integer :: i, k
+ tmp = 0.0e0
+ do i = 1, N
+ tmp = tmp + Pfun (k, i)
+ end do
+end function
+
+subroutine init ()
+ use e_53_4_mod
+ integer :: i, j
+ do i = 1, N
+ do j = 1, N
+ Q(i,j) = 0.001 * i * j
+ end do
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.00001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_53_4
+ use e_53_4_mod
+ integer :: i
+ real :: accum, accum_ref
+ call init ()
+ !$omp target update to(Q)
+ do i = 1, N
+ call check (accum (i), accum_ref (i))
+ end do
+end program
--- /dev/null
+! { dg-do run { target vect_simd_clones } }
+! { dg-options "-O2" }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module e_53_5_mod
+ !$omp declare target (N, Q)
+ integer, parameter :: N = 10000, M = 1024
+ real :: Q(N,N)
+contains
+ real function Pfun (k, i)
+ !$omp declare simd(Pfun) uniform(i) linear(k) notinbranch
+ !$omp declare target
+ integer, value, intent(in) :: i, k
+ Pfun = (Q(k,i) * Q(i,k))
+ end function
+end module
+
+real function accum () result (tmp)
+ use e_53_5_mod
+ real :: tmp1
+ integer :: i
+ tmp = 0.0e0
+ !$omp target
+ !$omp parallel do private(tmp1) reduction(+:tmp)
+ do i = 1, N
+ tmp1 = 0.0e0
+ !$omp simd reduction(+:tmp1)
+ do k = 1, M
+ tmp1 = tmp1 + Pfun (k, i)
+ end do
+ tmp = tmp + tmp1
+ end do
+ !$omp end target
+end function
+
+real function accum_ref () result (tmp)
+ use e_53_5_mod
+ real :: tmp1
+ integer :: i
+ tmp = 0.0e0
+ do i = 1, N
+ tmp1 = 0.0e0
+ do k = 1, M
+ tmp1 = tmp1 + Pfun (k, i)
+ end do
+ tmp = tmp + tmp1
+ end do
+end function
+
+subroutine init ()
+ use e_53_5_mod
+ integer :: i, j
+ do i = 1, N
+ do j = 1, N
+ Q(i,j) = 0.001 * i * j
+ end do
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.00001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_53_5
+ use e_53_5_mod
+ real :: accum, accum_ref, d
+ call init ()
+ !$omp target update to(Q)
+ call check (accum (), accum_ref ())
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+program e_57_1
+ use omp_lib, only: omp_is_initial_device
+ integer :: a, b
+ logical :: c, d
+
+ a = 100
+ b = 0
+
+ !$omp target if(a > 200 .and. a < 400)
+ c = omp_is_initial_device ()
+ !$omp end target
+
+ !$omp target data map(to: b) if(a > 200 .and. a < 400)
+ !$omp target
+ b = 100
+ d = omp_is_initial_device ()
+ !$omp end target
+ !$omp end target data
+
+ if (b /= 100 .or. .not. c .or. d) call abort
+
+ a = a + 200
+ b = 0
+
+ !$omp target if(a > 200 .and. a < 400)
+ c = omp_is_initial_device ()
+ !$omp end target
+
+ !$omp target data map(to: b) if(a > 200 .and. a < 400)
+ !$omp target
+ b = 100
+ d = omp_is_initial_device ()
+ !$omp end target
+ !$omp end target data
+
+ if (b /= 0 .or. c .or. d) call abort
+
+ a = a + 200
+ b = 0
+
+ !$omp target if(a > 200 .and. a < 400)
+ c = omp_is_initial_device ()
+ !$omp end target
+
+ !$omp target data map(to: b) if(a > 200 .and. a < 400)
+ !$omp target
+ b = 100
+ d = omp_is_initial_device ()
+ !$omp end target
+ !$omp end target data
+
+ if (b /= 100 .or. .not. c .or. d) call abort
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+program e_57_2
+ use omp_lib, only: omp_is_initial_device, omp_get_num_devices
+ integer, parameter :: N = 10
+ integer :: i, num
+ logical :: offload(N)
+ num = omp_get_num_devices ()
+ !$omp parallel do
+ do i = 1, N
+ !$omp target device(i-1) map(from: offload(i:i))
+ offload(i) = omp_is_initial_device ()
+ !$omp end target
+ end do
+
+ do i = 1, num
+ if (offload(i)) call abort
+ end do
+
+ do i = num+1, N
+ if (.not. offload(i)) call abort
+ end do
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+program e_57_3
+ use omp_lib, only: omp_is_initial_device, omp_get_num_devices,&
+ omp_get_default_device, omp_set_default_device
+ logical :: res
+ integer :: default_device
+
+ default_device = omp_get_default_device ()
+ !$omp target
+ res = omp_is_initial_device ()
+ !$omp end target
+ if (res) call abort
+
+ call omp_set_default_device (omp_get_num_devices ())
+ !$omp target
+ res = omp_is_initial_device ()
+ !$omp end target
+ if (.not. res) call abort
+end program
+++ /dev/null
-! { dg-do run }
-
-module e_50_1_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- integer :: i, N
- real :: p(N), v1(N), v2(N)
- call init (v1, v2, N)
- !$omp target
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- call check (p, N)
- end subroutine
-
-end module
-
-program e_50_1
- use e_50_1_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_50_2_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- integer :: i, N
- real :: p(N), v1(N), v2(N)
- call init (v1, v2, N)
- !$omp target map(v1,v2,p)
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- call check (p, N)
- end subroutine
-end module
-
-program e_50_2
- use e_50_2_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_50_3_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- integer :: i, N
- real :: p(N), v1(N), v2(N)
- call init (v1, v2, N)
- !$omp target map(to: v1,v2) map(from: p)
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- call check (p, N)
- end subroutine
-end module
-
-program e_50_3
- use e_50_3_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_50_4_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real, pointer, dimension(:) :: v1, v2
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real, pointer, dimension(:) :: p
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult_1 (p, v1, v2, N)
- integer :: i, N
- real, pointer, dimension(:) :: p, v1, v2
- !$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- end subroutine
-
- subroutine vec_mult_2 (p, v1, v2, N)
- real, dimension(*) :: p, v1, v2
- integer :: i, N
- !$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- end subroutine
-end module
-
-program e_50_4
- use e_50_4_mod, only : init, check, vec_mult_1, vec_mult_2
- real, pointer, dimension(:) :: p1, p2, v1, v2
- integer :: n
- n = 1000
- allocate (p1(n), p2(n), v1(n), v2(n))
- call init (v1, v2, n)
- call vec_mult_1 (p1, v1, v2, n)
- call vec_mult_2 (p2, v1, v2, n)
- call check (p1, N)
- call check (p2, N)
- deallocate (p1, p2, v1, v2)
-end program
+++ /dev/null
-! { dg-do run }
-! { dg-require-effective-target offload_device }
-
-module e_50_5_mod
-integer, parameter :: THRESHOLD1 = 500, THRESHOLD2 = 100
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- use omp_lib, only: omp_is_initial_device
- integer :: i, N
- real :: p(N), v1(N), v2(N)
- call init (v1, v2, N)
- !$omp target if(N > THRESHOLD1) map(to: v1,v2) map(from: p)
- if (omp_is_initial_device ()) call abort
- !$omp parallel do if(N > THRESHOLD2)
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- call check (p, N)
- end subroutine
-end module
-
-program e_50_5
- use e_50_5_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_51_1_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- call init (v1, v2, N)
- !$omp target data map(to: v1, v2) map(from: p)
- !$omp target
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- !$omp end target data
- call check (p, N)
- end subroutine
-end module
-
-program e_51_1
- use e_51_1_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_51_2_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine init_again (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i - 3.0
- v2(i) = i + 2.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- call init (v1, v2, N)
- !$omp target data map(from: p)
- !$omp target map(to: v1, v2 )
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- call init_again (v1, v2, N)
- !$omp target map(to: v1, v2 )
- !$omp parallel do
- do i = 1, N
- p(i) = p(i) + v1(i) * v2(i)
- end do
- !$omp end target
- !$omp end target data
- call check (p, N)
- end subroutine
-end module
-
-program e_51_2
- use e_51_2_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_51_3_mod
-contains
- subroutine init (Q, rows, cols)
- integer :: i, k, rows, cols
- double precision :: Q(rows,cols)
- do k = 1, cols
- do i = 1, rows
- Q(i,k) = 10 * i + k
- end do
- end do
- end subroutine
-
- subroutine check (P, Q, rows, cols)
- integer :: i, k, rows, cols
- double precision, parameter :: EPS = 0.00001
- double precision :: P(rows,cols), Q(rows,cols), diff
- do k = 1, cols
- do i = 1, rows
- diff = P(i,k) - Q(i,k)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end do
- end subroutine
-
- subroutine gramSchmidt_ref (Q, rows, cols)
- integer :: i, k, rows, cols
- double precision :: Q(rows,cols), tmp
- do k = 1, cols
- tmp = 0.0d0
- do i = 1, rows
- tmp = tmp + (Q(i,k) * Q(i,k))
- end do
- tmp = 1.0d0 / sqrt (tmp)
- do i = 1, rows
- Q(i,k) = Q(i,k) * tmp
- end do
- end do
- end subroutine
-
- subroutine gramSchmidt (Q, rows, cols)
- integer :: i, k, rows, cols
- double precision :: Q(rows,cols), tmp
- !$omp target data map(Q)
- do k = 1, cols
- tmp = 0.0d0
- !$omp target
- !$omp parallel do reduction(+:tmp)
- do i = 1, rows
- tmp = tmp + (Q(i,k) * Q(i,k))
- end do
- !$omp end target
- tmp = 1.0d0 / sqrt (tmp)
- !$omp target
- !$omp parallel do
- do i = 1, rows
- Q(i,k) = Q(i,k) * tmp
- end do
- !$omp end target
- end do
- !$omp end target data
- end subroutine
-end module
-
-program e_51_3
- use e_51_3_mod, only : init, check, gramSchmidt, gramSchmidt_ref
- integer :: cols, rows
- double precision, pointer :: P(:,:), Q(:,:)
- cols = 5
- rows = 5
- allocate (P(rows,cols), Q(rows,cols))
- call init (P, rows, cols)
- call init (Q, rows, cols)
- call gramSchmidt_ref (P, rows, cols)
- call gramSchmidt (Q, rows, cols)
- call check (P, Q, rows, cols)
- deallocate (P, Q)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_51_4_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine foo (p, v1, v2, N)
- real, pointer, dimension(:) :: p, v1, v2
- integer :: N
- call init (v1, v2, N)
- !$omp target data map(to: v1, v2) map(from: p)
- call vec_mult (p, v1, v2, N)
- !$omp end target data
- call check (p, N)
- end subroutine
-
- subroutine vec_mult (p, v1, v2, N)
- real, pointer, dimension(:) :: p, v1, v2
- integer :: i, N
- !$omp target map(to: v1, v2) map(from: p)
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- end subroutine
-end module
-
-program e_51_4
- use e_51_4_mod, only : foo
- integer :: n
- real, pointer, dimension(:) :: p, v1, v2
- n = 1000
- allocate (p(n), v1(n), v2(n))
- call foo (p, v1, v2, n)
- deallocate (p, v1, v2)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_51_5_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine foo (p, v1, v2, N)
- real, dimension(:) :: p, v1, v2
- integer :: N
- call init (v1, v2, N)
- !$omp target data map(to: v1, v2, N) map(from: p)
- call vec_mult (p, v1, v2, N)
- !$omp end target data
- call check (p, N)
- end subroutine
-
- subroutine vec_mult (p, v1, v2, N)
- real, dimension(:) :: p, v1, v2
- integer :: i, N
- !$omp target map(to: v1, v2, N) map(from: p)
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- end subroutine
-end module
-
-program e_51_5
- use e_51_5_mod, only : foo
- integer, parameter :: N = 1024
- real, allocatable, dimension(:) :: p, v1, v2
- allocate(p(N), v1(N), v2(N))
- call foo (p, v1, v2, N)
- deallocate (p, v1, v2)
-end program
+++ /dev/null
-! { dg-do run }
-! { dg-require-effective-target offload_device }
-
-module e_51_6_mod
-integer, parameter :: THRESHOLD = 500
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine init_again (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i - 3.0
- v2(i) = i + 2.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- use omp_lib, only: omp_is_initial_device
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- call init (v1, v2, N)
- !$omp target data if(N > THRESHOLD) map(from: p)
- !$omp target if(N > THRESHOLD) map(to: v1, v2)
- if (omp_is_initial_device ()) call abort
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- call init_again (v1, v2, N)
- !$omp target if(N > THRESHOLD) map(to: v1, v2)
- if (omp_is_initial_device ()) call abort
- !$omp parallel do
- do i = 1, N
- p(i) = p(i) + v1(i) * v2(i)
- end do
- !$omp end target
- !$omp end target data
- call check (p, N)
- end subroutine
-end module
-
-program e_51_6
- use e_51_6_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-! { dg-require-effective-target offload_device }
-
-module e_51_7_mod
-integer, parameter :: THRESHOLD = 500
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (N)
- use omp_lib, only: omp_is_initial_device
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- call init (v1, v2, N)
- !$omp target data if(N > THRESHOLD) map(to: v1, v2) map(from: p)
- !$omp target
- if (omp_is_initial_device ()) call abort
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- !$omp end target data
- call check (p, N)
- end subroutine
-end module
-
-program e_51_7
- use e_51_7_mod, only : vec_mult
- integer :: n
- n = 1000
- call vec_mult (n)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_52_1_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine init_again (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i - 3.0
- v2(i) = i + 2.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (p, v1, v2, N)
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- call init (v1, v2, N)
- !$omp target data map(to: v1, v2) map(from: p)
- !$omp target
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- call init_again (v1, v2, N)
- !$omp target update to(v1, v2)
- !$omp target
- !$omp parallel do
- do i = 1, N
- p(i) = p(i) + v1(i) * v2(i)
- end do
- !$omp end target
- !$omp end target data
- call check (p, N)
- end subroutine
-end module
-
-program e_52_1
- use e_52_1_mod, only : vec_mult
- integer :: n
- real, pointer :: p(:), v1(:), v2(:)
- n = 1000
- allocate (p(n), v1(n), v2(n))
- call vec_mult (p, v1, v2, n)
- deallocate (p, v1, v2)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_52_2_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine init_again (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i - 3.0
- v2(i) = i + 2.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i * i + (i + 2.0) * (i - 3.0))
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- logical function maybe_init_again (v, N)
- real :: v(N)
- integer :: i, N
- do i = 1, N
- v(i) = i
- end do
- maybe_init_again = .true.
- end function
-
- subroutine vec_mult (p, v1, v2, N)
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- logical :: changed
- call init (v1, v2, N)
- !$omp target data map(to: v1, v2) map(from: p)
- !$omp target
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- changed = maybe_init_again (v1, N)
- !$omp target update if(changed) to(v1(:N))
- changed = maybe_init_again (v2, N)
- !$omp target update if(changed) to(v2(:N))
- !$omp target
- !$omp parallel do
- do i = 1, N
- p(i) = p(i) + v1(i) * v2(i)
- end do
- !$omp end target
- !$omp end target data
- call check (p, N)
- end subroutine
-end module
-
-program e_52_2
- use e_52_2_mod, only : vec_mult
- integer :: n
- real, pointer :: p(:), v1(:), v2(:)
- n = 1000
- allocate (p(n), v1(n), v2(n))
- call vec_mult (p, v1, v2, n)
- deallocate (p, v1, v2)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_53_1_mod
- integer :: THRESHOLD = 20
-contains
- integer recursive function fib (n) result (f)
- !$omp declare target
- integer :: n
- if (n <= 0) then
- f = 0
- else if (n == 1) then
- f = 1
- else
- f = fib (n - 1) + fib (n - 2)
- end if
- end function
-
- integer function fib_wrapper (n)
- integer :: x
- !$omp target map(to: n) if(n > THRESHOLD)
- x = fib (n)
- !$omp end target
- fib_wrapper = x
- end function
-end module
-
-program e_53_1
- use e_53_1_mod, only : fib, fib_wrapper
- if (fib (15) /= fib_wrapper (15)) call abort
- if (fib (25) /= fib_wrapper (25)) call abort
-end program
+++ /dev/null
-! { dg-do run }
-
-program e_53_2
- !$omp declare target (fib)
- integer :: x, fib
- !$omp target
- x = fib (25)
- !$omp end target
- if (x /= fib (25)) call abort
-end program
-
-integer recursive function fib (n) result (f)
- !$omp declare target
- integer :: n
- if (n <= 0) then
- f = 0
- else if (n == 1) then
- f = 1
- else
- f = fib (n - 1) + fib (n - 2)
- end if
-end function
+++ /dev/null
-! { dg-do run }
-
-module e_53_3_mod
- !$omp declare target (N, p, v1, v2)
- integer, parameter :: N = 1000
- real :: p(N), v1(N), v2(N)
-end module
-
-subroutine init (v1, v2, N)
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
-end subroutine
-
-subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
-end subroutine
-
-subroutine vec_mult ()
- use e_53_3_mod
- integer :: i
- call init (v1, v2, N);
- !$omp target update to(v1, v2)
- !$omp target
- !$omp parallel do
- do i = 1,N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target
- !$omp target update from (p)
- call check (p, N)
-end subroutine
-
-program e_53_3
- call vec_mult ()
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_53_4_mod
- !$omp declare target (N, Q)
- integer, parameter :: N = 10
- real :: Q(N,N)
-contains
- real function Pfun (i, k)
- !$omp declare target
- integer, intent(in) :: i, k
- Pfun = (Q(i,k) * Q(k,i))
- end function
-end module
-
-real function accum (k) result (tmp)
- use e_53_4_mod
- integer :: i, k
- tmp = 0.0e0
- !$omp target
- !$omp parallel do reduction(+:tmp)
- do i = 1, N
- tmp = tmp + Pfun (k, i)
- end do
- !$omp end target
-end function
-
-real function accum_ref (k) result (tmp)
- use e_53_4_mod
- integer :: i, k
- tmp = 0.0e0
- do i = 1, N
- tmp = tmp + Pfun (k, i)
- end do
-end function
-
-subroutine init ()
- use e_53_4_mod
- integer :: i, j
- do i = 1, N
- do j = 1, N
- Q(i,j) = 0.001 * i * j
- end do
- end do
-end subroutine
-
-subroutine check (a, b)
- real :: a, b, err
- real, parameter :: EPS = 0.00001
- if (b == 0.0) then
- err = a
- else if (a == 0.0) then
- err = b
- else
- err = (a - b) / b
- end if
- if (err > EPS .or. err < -EPS) call abort
-end subroutine
-
-program e_53_4
- use e_53_4_mod
- integer :: i
- real :: accum, accum_ref
- call init ()
- !$omp target update to(Q)
- do i = 1, N
- call check (accum (i), accum_ref (i))
- end do
-end program
+++ /dev/null
-! { dg-do run { target vect_simd_clones } }
-! { dg-options "-O2" }
-! { dg-additional-options "-msse2" { target sse2_runtime } }
-! { dg-additional-options "-mavx" { target avx_runtime } }
-
-module e_53_5_mod
- !$omp declare target (N, Q)
- integer, parameter :: N = 10000, M = 1024
- real :: Q(N,N)
-contains
- real function Pfun (k, i)
- !$omp declare simd(Pfun) uniform(i) linear(k) notinbranch
- !$omp declare target
- integer, value, intent(in) :: i, k
- Pfun = (Q(k,i) * Q(i,k))
- end function
-end module
-
-real function accum () result (tmp)
- use e_53_5_mod
- real :: tmp1
- integer :: i
- tmp = 0.0e0
- !$omp target
- !$omp parallel do private(tmp1) reduction(+:tmp)
- do i = 1, N
- tmp1 = 0.0e0
- !$omp simd reduction(+:tmp1)
- do k = 1, M
- tmp1 = tmp1 + Pfun (k, i)
- end do
- tmp = tmp + tmp1
- end do
- !$omp end target
-end function
-
-real function accum_ref () result (tmp)
- use e_53_5_mod
- real :: tmp1
- integer :: i
- tmp = 0.0e0
- do i = 1, N
- tmp1 = 0.0e0
- do k = 1, M
- tmp1 = tmp1 + Pfun (k, i)
- end do
- tmp = tmp + tmp1
- end do
-end function
-
-subroutine init ()
- use e_53_5_mod
- integer :: i, j
- do i = 1, N
- do j = 1, N
- Q(i,j) = 0.001 * i * j
- end do
- end do
-end subroutine
-
-subroutine check (a, b)
- real :: a, b, err
- real, parameter :: EPS = 0.00001
- if (b == 0.0) then
- err = a
- else if (a == 0.0) then
- err = b
- else
- err = (a - b) / b
- end if
- if (err > EPS .or. err < -EPS) call abort
-end subroutine
-
-program e_53_5
- use e_53_5_mod
- real :: accum, accum_ref, d
- call init ()
- !$omp target update to(Q)
- call check (accum (), accum_ref ())
-end program
+++ /dev/null
-! { dg-do run }
-
-function dotprod_ref (B, C, N) result (sum)
- implicit none
- real :: B(N), C(N), sum
- integer :: N, i
- sum = 0.0e0
- do i = 1, N
- sum = sum + B(i) * C(i)
- end do
-end function
-
-function dotprod (B, C, N, block_size, num_teams, block_threads) result (sum)
- implicit none
- real :: B(N), C(N), sum
- integer :: N, block_size, num_teams, block_threads, i, i0
- sum = 0.0e0
- !$omp target map(to: B, C, block_size, num_teams, block_threads)
- !$omp teams num_teams(num_teams) thread_limit(block_threads) &
- !$omp& reduction(+:sum)
- !$omp distribute
- do i0 = 1, N, block_size
- !$omp parallel do reduction(+:sum)
- do i = i0, min (i0 + block_size - 1, N)
- sum = sum + B(i) * C(i)
- end do
- end do
- !$omp end teams
- !$omp end target
-end function
-
-subroutine init (B, C, N)
- real :: B(N), C(N)
- integer :: N, i
- do i = 1, N
- B(i) = 0.0001 * i
- C(i) = 0.000001 * i * i
- end do
-end subroutine
-
-subroutine check (a, b)
- real :: a, b, err
- real, parameter :: EPS = 0.0001
- if (b == 0.0) then
- err = a
- else if (a == 0.0) then
- err = b
- else
- err = (a - b) / b
- end if
- if (err > EPS .or. err < -EPS) call abort
-end subroutine
-
-program e_54_1
- integer :: n
- real :: ref, d
- real, pointer, dimension(:) :: B, C
- n = 1024 * 1024
- allocate (B(n), C(n))
- call init (B, C, n)
- ref = dotprod_ref (B, C, n)
- d = dotprod (B, C, n, n / 8, 2, 8)
- call check (ref, d)
- deallocate (B, C)
-end program
+++ /dev/null
-! { dg-do run }
-
-function dotprod_ref (B, C, N) result (sum)
- implicit none
- real :: B(N), C(N), sum
- integer :: N, i
- sum = 0.0e0
- do i = 1, N
- sum = sum + B(i) * C(i)
- end do
-end function
-
-function dotprod (B, C, N) result(sum)
- real :: B(N), C(N), sum
- integer :: N, i
- sum = 0.0e0
- !$omp target teams map(to: B, C)
- !$omp distribute parallel do reduction(+:sum)
- do i = 1, N
- sum = sum + B(i) * C(i)
- end do
- !$omp end target teams
-end function
-
-subroutine init (B, C, N)
- real :: B(N), C(N)
- integer :: N, i
- do i = 1, N
- B(i) = 0.0001 * i
- C(i) = 0.000001 * i * i
- end do
-end subroutine
-
-subroutine check (a, b)
- real :: a, b, err
- real, parameter :: EPS = 0.0001
- if (b == 0.0) then
- err = a
- else if (a == 0.0) then
- err = b
- else
- err = (a - b) / b
- end if
- if (err > EPS .or. err < -EPS) call abort
-end subroutine
-
-program e_54_3
- integer :: n
- real :: ref, d
- real, pointer, dimension(:) :: B, C
- n = 1024 * 1024
- allocate (B(n), C(n))
- call init (B, C, n)
- ref = dotprod_ref (B, C, n)
- d = dotprod (B, C, n)
- call check (ref, d)
- deallocate (B, C)
-end program
+++ /dev/null
-! { dg-do run }
-
-function dotprod_ref (B, C, N) result (sum)
- implicit none
- real :: B(N), C(N), sum
- integer :: N, i
- sum = 0.0e0
- do i = 1, N
- sum = sum + B(i) * C(i)
- end do
-end function
-
-function dotprod (B, C, n) result(sum)
- real :: B(N), C(N), sum
- integer :: N, i
- sum = 0.0e0
- !$omp target map(to: B, C)
- !$omp teams num_teams(8) thread_limit(16)
- !$omp distribute parallel do reduction(+:sum) &
- !$omp& dist_schedule(static, 1024) schedule(static, 64)
- do i = 1, N
- sum = sum + B(i) * C(i)
- end do
- !$omp end teams
- !$omp end target
-end function
-
-subroutine init (B, C, N)
- real :: B(N), C(N)
- integer :: N, i
- do i = 1, N
- B(i) = 0.0001 * i
- C(i) = 0.000001 * i * i
- end do
-end subroutine
-
-subroutine check (a, b)
- real :: a, b, err
- real, parameter :: EPS = 0.0001
- if (b == 0.0) then
- err = a
- else if (a == 0.0) then
- err = b
- else
- err = (a - b) / b
- end if
- if (err > EPS .or. err < -EPS) call abort
-end subroutine
-
-program e_54_4
- integer :: n
- real :: ref, d
- real, pointer, dimension(:) :: B, C
- n = 1024 * 1024
- allocate (B(n), C(n))
- call init (B, C, n)
- ref = dotprod_ref (B, C, n)
- d = dotprod (B, C, n)
- call check (ref, d)
- deallocate (B, C)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_54_5_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real, pointer, dimension(:) :: v1, v2
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real, pointer, dimension(:) :: p
- real :: diff
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (p, v1, v2, N)
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- !$omp target teams map(to: v1, v2) map(from: p)
- !$omp distribute simd
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target teams
- end subroutine
-end module
-
-program e_54_5
- use e_54_5_mod, only : init, check, vec_mult
- real, pointer, dimension(:) :: p, v1, v2
- integer :: n
- n = 1000
- allocate (p(n), v1(n), v2(n))
- call init (v1, v2, n)
- call vec_mult (p, v1, v2, n)
- call check (p, N)
- deallocate (p, v1, v2)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_54_6_mod
-contains
- subroutine init (v1, v2, N)
- integer :: i, N
- real, pointer, dimension(:) :: v1, v2
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
- end subroutine
-
- subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real, pointer, dimension(:) :: p
- real :: diff
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
- end subroutine
-
- subroutine vec_mult (p, v1, v2, N)
- real :: p(N), v1(N), v2(N)
- integer :: i, N
- !$omp target teams map(to: v1, v2) map(from: p)
- !$omp distribute parallel do simd
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- !$omp end target teams
- end subroutine
-end module
-
-program e_54_6
- use e_54_6_mod, only : init, check, vec_mult
- real, pointer, dimension(:) :: p, v1, v2
- integer :: n
- n = 1000
- allocate (p(n), v1(n), v2(n))
- call init (v1, v2, n)
- call vec_mult (p, v1, v2, n)
- call check (p, N)
- deallocate (p, v1, v2)
-end program
+++ /dev/null
-! { dg-do run }
-
-module e_55_1_mod
- integer, parameter :: N = 100000, CHUNKSZ = 10000
- real :: Y(N), Z(N)
-end module
-
-subroutine init ()
- use e_55_1_mod, only : Y, Z, N
- integer :: i
- do i = 1, N
- Y(i) = 0.1 * i
- Z(i) = Y(i)
- end do
-end subroutine
-
-subroutine check ()
- use e_55_1_mod, only : Y, Z, N
- real :: err
- real, parameter :: EPS = 0.00001
- integer :: i
- do i = 1, N
- if (Y(i) == 0.0) then
- err = Z(i)
- else if (Z(i) == 0.0) then
- err = Y(i)
- else
- err = (Y(i) - Z(i)) / Z(i)
- end if
- if (err > EPS .or. err < -EPS) call abort
- end do
-end subroutine
-
-real function F (z)
- !$omp declare target
- real, intent(in) :: z
- F = -z
-end function
-
-subroutine pipedF ()
- use e_55_1_mod, only: Z, N, CHUNKSZ
- integer :: C, i
- real :: F
- do C = 1, N, CHUNKSZ
- !$omp task
- !$omp target map(Z(C:C+CHUNKSZ-1))
- !$omp parallel do
- do i = C, C+CHUNKSZ-1
- Z(i) = F (Z(i))
- end do
- !$omp end target
- !$omp end task
- end do
-end subroutine
-
-subroutine pipedF_ref ()
- use e_55_1_mod, only: Y, N
- integer :: i
- real :: F
- do i = 1, N
- Y(i) = F (Y(i))
- end do
-end subroutine
-
-program e_55_1
- call init ()
- call pipedF ()
- call pipedF_ref ()
- call check ()
-end program
+++ /dev/null
-! { dg-do run }
-! { dg-require-effective-target offload_device }
-
-subroutine init (v1, v2, N)
- !$omp declare target
- integer :: i, N
- real :: v1(N), v2(N)
- do i = 1, N
- v1(i) = i + 2.0
- v2(i) = i - 3.0
- end do
-end subroutine
-
-subroutine check (p, N)
- integer :: i, N
- real, parameter :: EPS = 0.00001
- real :: diff, p(N)
- do i = 1, N
- diff = p(i) - (i + 2.0) * (i - 3.0)
- if (diff > EPS .or. -diff > EPS) call abort
- end do
-end subroutine
-
-subroutine vec_mult (p, N)
- use omp_lib, only: omp_is_initial_device
- real :: p(N)
- real, allocatable :: v1(:), v2(:)
- integer :: i
- !$omp declare target (init)
- !$omp target data map(to: v1, v2, N) map(from: p)
- !$omp task shared(v1, v2, p) depend(out: v1, v2)
- !$omp target map(to: v1, v2, N)
- if (omp_is_initial_device ()) call abort
- allocate (v1(N), v2(N))
- call init (v1, v2, N)
- !$omp end target
- !$omp end task
- !$omp task shared(v1, v2, p) depend(in: v1, v2)
- !$omp target map(to: v1, v2, N) map(from: p)
- if (omp_is_initial_device ()) call abort
- !$omp parallel do
- do i = 1, N
- p(i) = v1(i) * v2(i)
- end do
- deallocate (v1, v2)
- !$omp end target
- !$omp end task
- !$omp end target data
- call check (p, N)
-end subroutine
-
-program e_55_2
- integer, parameter :: N = 1000
- real :: p(N)
- call vec_mult (p, N)
-end program
+++ /dev/null
-! { dg-do run }
-
- call foo ()
-contains
- subroutine foo ()
- integer, target :: A(30)
- integer, pointer :: p(:)
- !$omp target data map(A(1:4))
- p => A
- !$omp target map(p(8:27)) map(A(1:4))
- A(3) = 777
- p(9) = 777
- !$omp end target
- !$omp end target data
- if (A(3) /= 777 .or. A(9) /= 777) call abort
- end subroutine
-end
+++ /dev/null
-! { dg-do run }
-
- call foo ()
-contains
- subroutine foo ()
- integer, target :: A(30)
- integer, pointer :: p(:)
- !$omp target data map(A(1:10))
- p => A
- !$omp target map(p(4:10)) map(A(1:10))
- A(3) = 777
- p(9) = 777
- A(9) = 999
- !$omp end target
- !$omp end target data
- if (A(3) /= 777 .or. A(9) /= 999) call abort
- end subroutine
-end
+++ /dev/null
-! { dg-do run }
-! { dg-require-effective-target offload_device }
-
-program e_57_1
- use omp_lib, only: omp_is_initial_device
- integer :: a, b
- logical :: c, d
-
- a = 100
- b = 0
-
- !$omp target if(a > 200 .and. a < 400)
- c = omp_is_initial_device ()
- !$omp end target
-
- !$omp target data map(to: b) if(a > 200 .and. a < 400)
- !$omp target
- b = 100
- d = omp_is_initial_device ()
- !$omp end target
- !$omp end target data
-
- if (b /= 100 .or. .not. c .or. d) call abort
-
- a = a + 200
- b = 0
-
- !$omp target if(a > 200 .and. a < 400)
- c = omp_is_initial_device ()
- !$omp end target
-
- !$omp target data map(to: b) if(a > 200 .and. a < 400)
- !$omp target
- b = 100
- d = omp_is_initial_device ()
- !$omp end target
- !$omp end target data
-
- if (b /= 0 .or. c .or. d) call abort
-
- a = a + 200
- b = 0
-
- !$omp target if(a > 200 .and. a < 400)
- c = omp_is_initial_device ()
- !$omp end target
-
- !$omp target data map(to: b) if(a > 200 .and. a < 400)
- !$omp target
- b = 100
- d = omp_is_initial_device ()
- !$omp end target
- !$omp end target data
-
- if (b /= 100 .or. .not. c .or. d) call abort
-end program
+++ /dev/null
-! { dg-do run }
-! { dg-require-effective-target offload_device }
-
-program e_57_2
- use omp_lib, only: omp_is_initial_device, omp_get_num_devices
- integer, parameter :: N = 10
- integer :: i, num
- logical :: offload(N)
- num = omp_get_num_devices ()
- !$omp parallel do
- do i = 1, N
- !$omp target device(i-1) map(from: offload(i:i))
- offload(i) = omp_is_initial_device ()
- !$omp end target
- end do
-
- do i = 1, num
- if (offload(i)) call abort
- end do
-
- do i = num+1, N
- if (.not. offload(i)) call abort
- end do
-end program
+++ /dev/null
-! { dg-do run }
-! { dg-require-effective-target offload_device }
-
-program e_57_3
- use omp_lib, only: omp_is_initial_device, omp_get_num_devices,&
- omp_get_default_device, omp_set_default_device
- logical :: res
- integer :: default_device
-
- default_device = omp_get_default_device ()
- !$omp target
- res = omp_is_initial_device ()
- !$omp end target
- if (res) call abort
-
- call omp_set_default_device (omp_get_num_devices ())
- !$omp target
- res = omp_is_initial_device ()
- !$omp end target
- if (.not. res) call abort
-end program
--- /dev/null
+! { dg-do run }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module SIMD1_mod
+contains
+ subroutine init (a, a_ref, b, c, n, ioff_ptr)
+ double precision :: a(*), a_ref(*), b(*), c(*)
+ integer :: n, i, s
+ integer, pointer :: ioff_ptr
+
+ s = -1
+ do i = 1, n
+ a(i) = i * i * s
+ a_ref(i) = a(i)
+ b(i) = i + i
+ end do
+
+ do i = 1, n+ioff_ptr
+ c(i) = i * 3
+ end do
+
+ end subroutine
+
+ subroutine check (a, b, n)
+ integer :: i, n
+ double precision, parameter :: EPS = 0.0000000000001
+ double precision :: diff, a(*), b(*)
+ do i = 1, n
+ diff = a(i) - b(i)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine star(a, a_ref, b, c, n, ioff_ptr)
+ double precision :: a(*), a_ref(*), b(*), c(*)
+ integer :: n, i
+ integer, pointer :: ioff_ptr
+
+ call init (a, a_ref, b, c, n, ioff_ptr)
+
+ !$omp simd
+ do i = 1,n
+ a(i) = a(i) * b(i) * c(i+ioff_ptr)
+ end do
+
+ do i = 1,n
+ a_ref(i) = a_ref(i) * b(i) * c(i+ioff_ptr)
+ end do
+
+ call check (a, a_ref, n)
+
+ end subroutine
+end module
+
+program SIMD1
+ use SIMD1_mod, only : star
+ double precision :: a(128), a_ref(128), b(128), c(144)
+ integer, pointer:: ioff_ptr
+ integer, target:: offset
+
+ offset = 16
+ ioff_ptr => offset
+
+ call star (a, a_ref, b, c, 128, ioff_ptr)
+
+end program
--- /dev/null
+! { dg-do run { target vect_simd_clones } }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module SIMD2_mod
+contains
+ function add1(a,b,fact) result(c)
+ !$omp declare simd(add1) uniform(fact)
+ double precision :: a,b,fact, c
+ c = a + b + fact
+ end function
+
+ function add2(a,b,i, fact) result(c)
+ !$omp declare simd(add2) uniform(a,b,fact) linear(i:1)
+ integer, value :: i
+ double precision, dimension(:) :: a, b
+ double precision :: fact, c
+ c = a(i) + b(i) + fact
+ end function
+
+ subroutine work(a, b, n )
+ implicit none
+ double precision :: a(n),b(n), tmp
+ integer :: n, i
+
+ !$omp simd private(tmp)
+ do i = 1,n
+ tmp = add1(a(i), b(i), 1.0d0)
+ a(i) = add2(a, b, i, 1.0d0) + tmp
+ a(i) = a(i) + b(i) + 1.0d0
+ end do
+ end subroutine
+
+ subroutine work_ref(a, b, n )
+ implicit none
+ double precision :: a(n),b(n), tmp
+ integer :: n, i
+
+ do i = 1,n
+ tmp = add1(a(i), b(i), 1.0d0)
+ a(i) = add2(a, b, i, 1.0d0) + tmp
+ a(i) = a(i) + b(i) + 1.0d0
+ end do
+ end subroutine
+
+ subroutine check (a, b, n)
+ integer :: i, n
+ double precision, parameter :: EPS = 0.0000000000001
+ double precision :: diff, a(*), b(*)
+ do i = 1, n
+ diff = a(i) - b(i)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+end module
+
+program main
+ use SIMD2_mod
+ integer, parameter :: N=32
+ integer :: i
+ double precision :: a(N), b(N), a_ref(N)
+ do i = 1,N
+ a(i) = i-1
+ a_ref(i) = a(i)
+ b(i) = N-(i-1)
+ end do
+
+ call work(a, b, N )
+ call work_ref(a_ref, b, N )
+
+ call check(a, a_ref, N )
+end program
--- /dev/null
+! { dg-do run }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module SIMD3_mod
+contains
+ subroutine work( a, b, n, sum )
+ implicit none
+ integer :: i, n
+ double precision :: a(n), b(n), sum, tmp
+
+ sum = 0.0d0
+ call init(a, b, n)
+ !$omp simd private(tmp) reduction(+:sum)
+ do i = 1,n
+ tmp = a(i) + b(i)
+ sum = sum + tmp
+ end do
+
+ end subroutine work
+
+ subroutine work_ref( a, b, n, sum )
+ implicit none
+ integer :: i, n
+ double precision :: a(n), b(n), sum, tmp
+
+ sum = 0.0d0
+ call init(a, b, n)
+ do i = 1,n
+ tmp = a(i) + b(i)
+ sum = sum + tmp
+ end do
+
+ end subroutine work_ref
+
+ subroutine init (a, b, n)
+ double precision :: a(*), b(*)
+ integer :: n, i, s
+
+ s = -1
+ do i = 1, n
+ a(i) = i * i * s
+ b(i) = i + i
+ s = -s
+ end do
+
+ end subroutine
+end module
+
+program SIMD3
+ use SIMD3_mod
+ double precision :: a(128), b(128), sum, sum_ref
+
+ call work(a, b, 128, sum)
+ call work_ref(a, b, 128, sum_ref)
+
+ if (sum .ne. sum_ref) call abort
+
+end program
--- /dev/null
+! { dg-do run }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module SIMD4_mod
+contains
+ subroutine work( b, n, m )
+ implicit none
+ real :: b(n)
+ integer :: i,n,m
+
+ call init(b, n)
+
+ !$omp simd safelen(16)
+ do i = m+1, n
+ b(i) = b(i-m) - 1.0
+ end do
+ end subroutine work
+
+ subroutine work_ref( b, n, m )
+ implicit none
+ real :: b(n)
+ integer :: i,n,m
+
+ call init(b, n)
+
+ do i = m+1, n
+ b(i) = b(i-m) - 1.0
+ end do
+ end subroutine work_ref
+
+ subroutine init (b, n)
+ real :: b(*)
+ integer :: n, i, s
+
+ s = -1
+ do i = 1, n
+ b(i) = i * i * s
+ s = -s
+ end do
+
+ end subroutine
+
+ subroutine check (a, b, n)
+ integer :: i, n
+ real, parameter :: EPS = 0.000001
+ real :: diff, a(*), b(*)
+ do i = 1, n
+ diff = a(i) - b(i)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+end module
+
+program SIMD4
+ use SIMD4_mod
+ real :: b(128), b_ref(128)
+
+ call work(b, 128, 32)
+ call work_ref(b_ref, 128, 32)
+
+ call check(b, b_ref, 128)
+end program
--- /dev/null
+! { dg-do run }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module SIMD5_mod
+contains
+ subroutine work( a, b, c, n )
+ implicit none
+ integer :: i,j,n
+ double precision :: a(n,n), b(n,n), c(n,n), tmp
+
+ !$omp do simd collapse(2) private(tmp)
+ do j = 1,n
+ do i = 1,n
+ tmp = a(i,j) + b(i,j)
+ c(i,j) = tmp
+ end do
+ end do
+
+ end subroutine work
+
+ subroutine work_ref( a, b, c, n )
+ implicit none
+ integer :: i,j,n
+ double precision :: a(n,n), b(n,n), c(n,n), tmp
+
+ do j = 1,n
+ do i = 1,n
+ tmp = a(i,j) + b(i,j)
+ c(i,j) = tmp
+ end do
+ end do
+
+ end subroutine work_ref
+
+ subroutine init (a, b, n)
+ integer :: i,j,n,s
+ double precision :: a(n,n), b(n,n)
+
+ s = -1
+
+ do j = 1,n
+ do i = 1,n
+ a(i,j) = i*j*s
+ b(i,j) = i+j
+ s = -s
+ end do
+ end do
+
+ end subroutine
+
+ subroutine check (a, b, n)
+ integer :: i, j, n
+ double precision, parameter :: EPS = 0.0000000000000001
+ double precision :: diff, a(n,n), b(n,n)
+ do j = 1, n
+ do i = 1, n
+ diff = a(i,j) - b(i,j)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end do
+ end subroutine
+
+end module
+
+program SIMD5
+ use SIMD5_mod
+ double precision, dimension(32, 32) :: a, b, c, c_ref
+
+ call init(a, b, 32)
+
+ call work(a, b, c, 32)
+ call work_ref(a, b, c_ref, 32)
+
+ call check(c, c_ref, 32)
+end program
--- /dev/null
+! { dg-do run { target vect_simd_clones } }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module SIMD6_mod
+contains
+ function foo(p) result(r)
+ !$omp declare simd(foo) notinbranch
+ integer :: p, r
+ p = p + 10
+ r = p
+ end function foo
+
+ function myaddint(a, b, n) result(r)
+ implicit none
+ integer :: a(*), b(*), n, r
+ integer :: i
+
+ !$omp simd
+ do i=1, n
+ a(i) = foo(b(i)) ! foo is not called under a condition
+ end do
+ r = a(n)
+
+ end function myaddint
+
+ function myaddint_ref(a, b, n) result(r)
+ implicit none
+ integer :: a(*), b(*), n, r
+ integer :: i
+
+ do i=1, n
+ a(i) = foo(b(i))
+ end do
+ r = a(n)
+
+ end function myaddint_ref
+
+ function goo(p) result(r)
+ !$omp declare simd(goo) inbranch
+ real :: p, r
+ p = p + 18.5
+ r = p
+ end function goo
+
+ function myaddfloat(x, y, n) result(r)
+ implicit none
+ real :: x(*), y(*), r
+ integer :: n
+ integer :: i
+
+ !$omp simd
+ do i=1, n
+ if (x(i) > y(i)) then
+ x(i) = goo(y(i))
+ ! goo is called under the condition (or within a branch)
+ else
+ x(i) = y(i)
+ endif
+ end do
+
+ r = x(n)
+ end function myaddfloat
+
+ function myaddfloat_ref(x, y, n) result(r)
+ implicit none
+ real :: x(*), y(*), r
+ integer :: n
+ integer :: i
+
+ do i=1, n
+ if (x(i) > y(i)) then
+ x(i) = goo(y(i))
+ else
+ x(i) = y(i)
+ endif
+ end do
+
+ r = x(n)
+ end function myaddfloat_ref
+
+ subroutine init (b, y, n)
+ integer :: b(128)
+ real :: y(128)
+
+ s = -1
+ do i = 1, n
+ b(i) = i*i*s
+ y(i) = i*i*s
+ s = -s
+ end do
+
+ end subroutine
+
+ subroutine init2 (b, y, n)
+ integer :: b(128)
+ real :: y(128)
+
+ do i = 1, n
+ b(i) = i
+ y(i) = i
+ end do
+
+ end subroutine
+
+ subroutine checkfloat (a, b, n)
+ integer :: i, n
+ real, parameter :: EPS = 0.000001
+ real :: diff, a(*), b(*)
+ do i = 1, n
+ diff = a(i) - b(i)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine checkint (a, b, n)
+ integer :: i, n, a(*), b(*)
+ do i = 1, n
+ if (a(i) .ne. b(i)) call abort
+ end do
+ end subroutine
+
+ subroutine test ()
+ integer :: a(128), a_ref(128), b(128), ri, ri_ref
+ real :: x(128), x_ref(128), y(128), rf, rf_ref
+
+ call init2(a, x, 128)
+ call init2(a_ref, x_ref, 128)
+
+ call init(b, y, 128)
+
+ ri = myaddint (a, b, 128)
+ rf = myaddfloat (x, y, 128)
+
+ call init(b, y, 128)
+
+ ri_ref = myaddint_ref (a_ref, b, 128)
+ rf_ref = myaddfloat_ref (x_ref, y, 128)
+
+ call checkint (a, a_ref, 128)
+ call checkfloat (x, x_ref, 128)
+ end subroutine
+
+end module
+
+program SIMD6
+ use SIMD6_mod, only: test
+
+ call test ()
+
+end program
--- /dev/null
+! { dg-do run { target vect_simd_clones } }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+program fibonacci
+ implicit none
+ integer,parameter :: N=45
+ integer :: a(0:N-1), b(0:N-1)
+ integer :: a_ref(0:N-1), b_ref(0:N-1)
+ integer :: i
+ integer, external :: fib
+
+ !$omp simd
+ do i = 0,N-1
+ b(i) = i
+ end do
+
+ do i = 0,N-1
+ b_ref(i) = i
+ end do
+
+ !$omp simd
+ do i=0,N-1
+ a(i) = fib(b(i))
+ end do
+
+ do i=0,N-1
+ a_ref(i) = fib(b_ref(i))
+ end do
+
+ do i = 0, N-1
+ if (a(i) .ne. a_ref(i)) call abort ()
+ end do
+
+ if (a(44) .ne. 1134903170) call abort()
+
+end program
+
+recursive function fib(n) result(r)
+!$omp declare simd(fib) inbranch
+ integer :: n, r
+
+ if (n <= 2) then
+ r = n
+ else
+ r = fib(n-1) + fib(n-2)
+ endif
+
+end function fib
--- /dev/null
+! { dg-do run }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module work
+
+integer :: P(1000)
+real :: A(1000)
+
+contains
+function do_work(arr) result(pri)
+ implicit none
+ real, dimension(*) :: arr
+
+ real :: pri
+ integer :: i, j
+
+ !$omp simd private(j) lastprivate(pri)
+ do i = 1, 999
+ j = P(i)
+
+ pri = 0.5
+ if (mod(j-1, 2) == 0) then
+ pri = A(j+1) + arr(i)
+ endif
+ A(j) = pri * 1.5
+ pri = pri + A(j)
+ end do
+
+end function do_work
+
+end module work
+
+program simd_8f
+ use work
+ implicit none
+ real :: pri, arr(1000)
+ integer :: i
+
+ do i = 1, 1000
+ P(i) = i
+ A(i) = (i-1) * 1.5
+ arr(i) = (i-1) * 1.8
+ end do
+ pri = do_work(arr)
+ if (pri .ne. 8237.25) call abort ()
+
+end program
--- /dev/null
+! { dg-do run }
+
+module e_50_1_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+
+end module
+
+program e_50_1
+ use e_50_1_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_50_2_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target map(v1,v2,p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+end module
+
+program e_50_2
+ use e_50_2_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_50_3_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target map(to: v1,v2) map(from: p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+end module
+
+program e_50_3
+ use e_50_3_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_50_4_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: v1, v2
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real, pointer, dimension(:) :: p
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult_1 (p, v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: p, v1, v2
+ !$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+
+ subroutine vec_mult_2 (p, v1, v2, N)
+ real, dimension(*) :: p, v1, v2
+ integer :: i, N
+ !$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+end module
+
+program e_50_4
+ use e_50_4_mod, only : init, check, vec_mult_1, vec_mult_2
+ real, pointer, dimension(:) :: p1, p2, v1, v2
+ integer :: n
+ n = 1000
+ allocate (p1(n), p2(n), v1(n), v2(n))
+ call init (v1, v2, n)
+ call vec_mult_1 (p1, v1, v2, n)
+ call vec_mult_2 (p2, v1, v2, n)
+ call check (p1, N)
+ call check (p2, N)
+ deallocate (p1, p2, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+module e_50_5_mod
+integer, parameter :: THRESHOLD1 = 500, THRESHOLD2 = 100
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ use omp_lib, only: omp_is_initial_device
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target if(N > THRESHOLD1) map(to: v1,v2) map(from: p)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do if(N > THRESHOLD2)
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+end module
+
+program e_50_5
+ use e_50_5_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_1_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_1
+ use e_51_1_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_2_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data map(from: p)
+ !$omp target map(to: v1, v2 )
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call init_again (v1, v2, N)
+ !$omp target map(to: v1, v2 )
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_2
+ use e_51_2_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_3_mod
+contains
+ subroutine init (Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision :: Q(rows,cols)
+ do k = 1, cols
+ do i = 1, rows
+ Q(i,k) = 10 * i + k
+ end do
+ end do
+ end subroutine
+
+ subroutine check (P, Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision, parameter :: EPS = 0.00001
+ double precision :: P(rows,cols), Q(rows,cols), diff
+ do k = 1, cols
+ do i = 1, rows
+ diff = P(i,k) - Q(i,k)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end do
+ end subroutine
+
+ subroutine gramSchmidt_ref (Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision :: Q(rows,cols), tmp
+ do k = 1, cols
+ tmp = 0.0d0
+ do i = 1, rows
+ tmp = tmp + (Q(i,k) * Q(i,k))
+ end do
+ tmp = 1.0d0 / sqrt (tmp)
+ do i = 1, rows
+ Q(i,k) = Q(i,k) * tmp
+ end do
+ end do
+ end subroutine
+
+ subroutine gramSchmidt (Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision :: Q(rows,cols), tmp
+ !$omp target data map(Q)
+ do k = 1, cols
+ tmp = 0.0d0
+ !$omp target
+ !$omp parallel do reduction(+:tmp)
+ do i = 1, rows
+ tmp = tmp + (Q(i,k) * Q(i,k))
+ end do
+ !$omp end target
+ tmp = 1.0d0 / sqrt (tmp)
+ !$omp target
+ !$omp parallel do
+ do i = 1, rows
+ Q(i,k) = Q(i,k) * tmp
+ end do
+ !$omp end target
+ end do
+ !$omp end target data
+ end subroutine
+end module
+
+program e_51_3
+ use e_51_3_mod, only : init, check, gramSchmidt, gramSchmidt_ref
+ integer :: cols, rows
+ double precision, pointer :: P(:,:), Q(:,:)
+ cols = 5
+ rows = 5
+ allocate (P(rows,cols), Q(rows,cols))
+ call init (P, rows, cols)
+ call init (Q, rows, cols)
+ call gramSchmidt_ref (P, rows, cols)
+ call gramSchmidt (Q, rows, cols)
+ call check (P, Q, rows, cols)
+ deallocate (P, Q)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_4_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine foo (p, v1, v2, N)
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ call vec_mult (p, v1, v2, N)
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: i, N
+ !$omp target map(to: v1, v2) map(from: p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+end module
+
+program e_51_4
+ use e_51_4_mod, only : foo
+ integer :: n
+ real, pointer, dimension(:) :: p, v1, v2
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call foo (p, v1, v2, n)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_5_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine foo (p, v1, v2, N)
+ real, dimension(:) :: p, v1, v2
+ integer :: N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2, N) map(from: p)
+ call vec_mult (p, v1, v2, N)
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real, dimension(:) :: p, v1, v2
+ integer :: i, N
+ !$omp target map(to: v1, v2, N) map(from: p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+end module
+
+program e_51_5
+ use e_51_5_mod, only : foo
+ integer, parameter :: N = 1024
+ real, allocatable, dimension(:) :: p, v1, v2
+ allocate(p(N), v1(N), v2(N))
+ call foo (p, v1, v2, N)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+module e_51_6_mod
+integer, parameter :: THRESHOLD = 500
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ use omp_lib, only: omp_is_initial_device
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data if(N > THRESHOLD) map(from: p)
+ !$omp target if(N > THRESHOLD) map(to: v1, v2)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call init_again (v1, v2, N)
+ !$omp target if(N > THRESHOLD) map(to: v1, v2)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_6
+ use e_51_6_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+module e_51_7_mod
+integer, parameter :: THRESHOLD = 500
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ use omp_lib, only: omp_is_initial_device
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data if(N > THRESHOLD) map(to: v1, v2) map(from: p)
+ !$omp target
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_7
+ use e_51_7_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_52_1_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call init_again (v1, v2, N)
+ !$omp target update to(v1, v2)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_52_1
+ use e_52_1_mod, only : vec_mult
+ integer :: n
+ real, pointer :: p(:), v1(:), v2(:)
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call vec_mult (p, v1, v2, n)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_52_2_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i * i + (i + 2.0) * (i - 3.0))
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ logical function maybe_init_again (v, N)
+ real :: v(N)
+ integer :: i, N
+ do i = 1, N
+ v(i) = i
+ end do
+ maybe_init_again = .true.
+ end function
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ logical :: changed
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ changed = maybe_init_again (v1, N)
+ !$omp target update if(changed) to(v1(:N))
+ changed = maybe_init_again (v2, N)
+ !$omp target update if(changed) to(v2(:N))
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_52_2
+ use e_52_2_mod, only : vec_mult
+ integer :: n
+ real, pointer :: p(:), v1(:), v2(:)
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call vec_mult (p, v1, v2, n)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+program example
+ integer :: x
+ x = 1
+ !$omp parallel
+ !$omp single
+ !$omp task shared(x) depend(out: x)
+ x = 2
+ !$omp end task
+ !$omp task shared(x) depend(in: x)
+ if (x .ne. 2) call abort ()
+ !$omp end task
+ !$omp end single
+ !$omp end parallel
+end program
--- /dev/null
+! { dg-do run }
+
+program example
+ integer :: x
+ x = 1
+ !$omp parallel
+ !$omp single
+ !$omp task shared(x) depend(in: x)
+ if (x .ne. 1) call abort ()
+ !$omp end task
+ !$omp task shared(x) depend(out: x)
+ x = 2
+ !$omp end task
+ !$omp end single
+ !$omp end parallel
+end program
--- /dev/null
+! { dg-do run }
+
+program example
+ integer :: x
+ x = 0
+ !$omp parallel
+ !$omp single
+ !$omp task shared(x) depend(out: x)
+ x = 1
+ !$omp end task
+ !$omp task shared(x) depend(out: x)
+ x = 2
+ !$omp end task
+ !$omp taskwait
+ if ((x .ne. 1) .and. (x .ne. 2)) call abort()
+ !$omp end single
+ !$omp end parallel
+end program
--- /dev/null
+! { dg-do run }
+
+program example
+ integer :: x
+ x = 1
+ !$omp parallel
+ !$omp single
+ !$omp task shared(x) depend(out: x)
+ x = 2
+ !$omp end task
+ !$omp task shared(x) depend(in: x)
+ if (x .ne. 2) call abort ()
+ !$omp end task
+ !$omp task shared(x) depend(in: x)
+ if (x .ne. 2) call abort ()
+ !$omp end task
+ !$omp end single
+ !$omp end parallel
+end program
--- /dev/null
+! { dg-do run }
+
+module task_dep5_mod
+contains
+ subroutine matmul_depend (N, BS, A, B, C)
+ implicit none
+ integer :: N, BS, BM
+ real, dimension(N, N) :: A, B, C
+ integer :: i, j, k, ii, jj, kk
+ BM = BS - 1
+ do i = 1, N, BS
+ do j = 1, N, BS
+ do k = 1, N, BS
+ !$omp task shared(A,B,C) private(ii,jj,kk) & ! I,J,K are firstprivate by default
+ !$omp depend ( in: A(i:i+BM, k:k+BM), B(k:k+BM, j:j+BM) ) &
+ !$omp depend ( inout: C(i:i+BM, j:j+BM) )
+ do ii = i, i+BM
+ do jj = j, j+BM
+ do kk = k, k+BM
+ C(jj,ii) = C(jj,ii) + A(kk,ii) * B(jj,kk)
+ end do
+ end do
+ end do
+ !$omp end task
+ end do
+ end do
+ end do
+ end subroutine
+
+ subroutine matmul_ref (N, A, B, C)
+ implicit none
+ integer :: N
+ real, dimension(N, N) :: A, B, C
+ integer :: i, j, k
+ do i = 1, N
+ do j = 1, N
+ do k = 1, N
+ C(j,i) = C(j,i) + A(k,i) * B(j,k)
+ end do
+ end do
+ end do
+ end subroutine
+
+ subroutine check (N, A, B)
+ integer :: N
+ integer :: i, j
+ integer, parameter :: EPS = 0.000001
+ real, dimension(N,N) :: A, B
+ real :: diff
+ do i = 1, N
+ do j = 1, N
+ diff = A(i, j) - B(i, j)
+ if (diff > EPS .or. -diff > EPS) then
+ call abort ()
+ end if
+ end do
+ end do
+ end subroutine
+
+ subroutine init (N, A, B)
+ integer :: N
+ integer :: i, j, s
+ real, dimension(N,N) :: A, B
+ s = -1
+ do i = 1, N
+ do j = 1, N
+ A(i, j) = i*j*s
+ B(i, j) = i+j
+ s = -s
+ end do
+ end do
+ end subroutine
+
+ subroutine zero_init (N, A, B)
+ integer :: N
+ integer :: i, j
+ real, dimension(N,N) :: A, B
+ do i = 1, N
+ do j = 1, N
+ A(i, j) = 0
+ B(i, j) = 0
+ end do
+ end do
+ end subroutine
+
+end module
+
+program main
+ use task_dep5_mod
+ real, dimension(32, 32) :: A, B, C, C_ref
+
+ call init (32, A, B)
+ call zero_init (32, C, C_ref)
+
+ call matmul_depend(32, 4, A, B, C)
+ call matmul_ref(32, A, B, C_ref)
+
+ call check (32, C, C_ref)
+
+end program
--- /dev/null
+! { dg-do run }
+
+function dotprod_ref (B, C, N) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+end function
+
+function dotprod (B, C, N, block_size, num_teams, block_threads) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, block_size, num_teams, block_threads, i, i0
+ sum = 0.0e0
+ !$omp target map(to: B, C, block_size, num_teams, block_threads)
+ !$omp teams num_teams(num_teams) thread_limit(block_threads) &
+ !$omp& reduction(+:sum)
+ !$omp distribute
+ do i0 = 1, N, block_size
+ !$omp parallel do reduction(+:sum)
+ do i = i0, min (i0 + block_size - 1, N)
+ sum = sum + B(i) * C(i)
+ end do
+ end do
+ !$omp end teams
+ !$omp end target
+end function
+
+subroutine init (B, C, N)
+ real :: B(N), C(N)
+ integer :: N, i
+ do i = 1, N
+ B(i) = 0.0001 * i
+ C(i) = 0.000001 * i * i
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.0001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_54_1
+ integer :: n
+ real :: ref, d
+ real, pointer, dimension(:) :: B, C
+ n = 1024 * 1024
+ allocate (B(n), C(n))
+ call init (B, C, n)
+ ref = dotprod_ref (B, C, n)
+ d = dotprod (B, C, n, n / 8, 2, 8)
+ call check (ref, d)
+ deallocate (B, C)
+end program
--- /dev/null
+! { dg-do run }
+
+function dotprod_ref (B, C, N) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+end function
+
+function dotprod (B, C, N) result(sum)
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ !$omp target teams map(to: B, C)
+ !$omp distribute parallel do reduction(+:sum)
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+ !$omp end target teams
+end function
+
+subroutine init (B, C, N)
+ real :: B(N), C(N)
+ integer :: N, i
+ do i = 1, N
+ B(i) = 0.0001 * i
+ C(i) = 0.000001 * i * i
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.0001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_54_3
+ integer :: n
+ real :: ref, d
+ real, pointer, dimension(:) :: B, C
+ n = 1024 * 1024
+ allocate (B(n), C(n))
+ call init (B, C, n)
+ ref = dotprod_ref (B, C, n)
+ d = dotprod (B, C, n)
+ call check (ref, d)
+ deallocate (B, C)
+end program
--- /dev/null
+! { dg-do run }
+
+function dotprod_ref (B, C, N) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+end function
+
+function dotprod (B, C, n) result(sum)
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ !$omp target map(to: B, C)
+ !$omp teams num_teams(8) thread_limit(16)
+ !$omp distribute parallel do reduction(+:sum) &
+ !$omp& dist_schedule(static, 1024) schedule(static, 64)
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+ !$omp end teams
+ !$omp end target
+end function
+
+subroutine init (B, C, N)
+ real :: B(N), C(N)
+ integer :: N, i
+ do i = 1, N
+ B(i) = 0.0001 * i
+ C(i) = 0.000001 * i * i
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.0001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_54_4
+ integer :: n
+ real :: ref, d
+ real, pointer, dimension(:) :: B, C
+ n = 1024 * 1024
+ allocate (B(n), C(n))
+ call init (B, C, n)
+ ref = dotprod_ref (B, C, n)
+ d = dotprod (B, C, n)
+ call check (ref, d)
+ deallocate (B, C)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_54_5_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: v1, v2
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real, pointer, dimension(:) :: p
+ real :: diff
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ !$omp target teams map(to: v1, v2) map(from: p)
+ !$omp distribute simd
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target teams
+ end subroutine
+end module
+
+program e_54_5
+ use e_54_5_mod, only : init, check, vec_mult
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: n
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call init (v1, v2, n)
+ call vec_mult (p, v1, v2, n)
+ call check (p, N)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_54_6_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: v1, v2
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real, pointer, dimension(:) :: p
+ real :: diff
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ !$omp target teams map(to: v1, v2) map(from: p)
+ !$omp distribute parallel do simd
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target teams
+ end subroutine
+end module
+
+program e_54_6
+ use e_54_6_mod, only : init, check, vec_mult
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: n
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call init (v1, v2, n)
+ call vec_mult (p, v1, v2, n)
+ call check (p, N)
+ deallocate (p, v1, v2)
+end program