+2014-11-13 Andrey Turetskiy <andrey.turetskiy@intel.com>
+ Ilya Verbin <ilya.verbin@intel.com>
+ Kirill Yukhin <kirill.yukhin@intel.com>
+ Ilya Tocar <ilya.tocar@intel.com>
+
+ * testsuite/lib/libgomp.exp
+ (check_effective_target_offload_device): New.
+ * testsuite/libgomp.c++/c++.exp: Include tests from subdirectories.
+ * testsuite/libgomp.c++/examples-4/e.51.5.C: New test.
+ * testsuite/libgomp.c++/examples-4/e.53.2.C: Ditto.
+ * testsuite/libgomp.c/examples-4/e.50.1.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.50.2.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.50.3.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.50.4.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.50.5.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.51.1.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.51.2.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.51.3.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.51.4.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.51.6.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.51.7.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.52.1.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.52.2.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.53.1.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.53.3.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.53.4.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.53.5.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.54.2.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.54.3.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.54.4.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.54.5.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.54.6.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.55.1.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.55.2.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.56.3.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.56.4.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.57.1.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.57.2.c: Ditto.
+ * testsuite/libgomp.c/examples-4/e.57.3.c: Ditto.
+ * testsuite/libgomp.c/target-7.c: Fix test.
+ * testsuite/libgomp.fortran/examples-4/e.50.1.f90: New test.
+ * testsuite/libgomp.fortran/examples-4/e.50.2.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.50.3.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.50.4.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.50.5.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.51.1.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.51.2.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.51.3.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.51.4.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.51.5.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.51.6.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.51.7.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.52.1.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.52.2.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.53.1.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.53.2.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.53.3.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.53.4.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.53.5.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.54.2.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.54.3.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.54.4.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.54.5.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.54.6.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.55.1.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.55.2.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.56.3.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.56.4.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.57.1.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.57.2.f90: Ditto.
+ * testsuite/libgomp.fortran/examples-4/e.57.3.f90: Ditto.
+
2014-11-13 Jakub Jelinek <jakub@redhat.com>
Ilya Verbin <ilya.verbin@intel.com>
Thomas Schwinge <thomas@codesourcery.com>
return 0
}
}
+
+# Return 1 if offload device is available.
+proc check_effective_target_offload_device { } {
+ return [check_runtime_nocache offload_device_available_ {
+ #include <omp.h>
+ int main ()
+ {
+ int a;
+ #pragma omp target map(from: a)
+ a = omp_is_initial_device ();
+ return a;
+ }
+ } ]
+}
if { $lang_test_file_found } {
# Gather a list of all tests.
- set tests [lsort [glob -nocomplain $srcdir/$subdir/*.C]]
+ set tests [lsort [find $srcdir/$subdir *.C]]
if { $blddir != "" } {
set ld_library_path "$always_ld_library_path:${blddir}/${lang_library_path}"
--- /dev/null
+// { dg-do run }
+
+#include <omp.h>
+
+#define EPS 0.000001
+#define N 1000
+
+extern "C" void abort (void);
+
+void init (float *a1, float *a2, int n)
+{
+ int s = -1;
+ for (int i = 0; i < n; i++)
+ {
+ a1[i] = s * 0.01;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (float *a, float *b, int n)
+{
+ for (int i = 0; i < n; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref (float *&p, float *&v1, float *&v2, int n)
+{
+ for (int i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *&p, float *&v1, float *&v2, int n)
+{
+ #pragma omp target map(to: v1[0:n], v2[:n]) map(from: p[0:n])
+ #pragma omp parallel for
+ for (int i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ float *p = new float [N];
+ float *p1 = new float [N];
+ float *v1 = new float [N];
+ float *v2 = new float [N];
+
+ init (v1, v2, N);
+
+ vec_mult_ref (p, v1, v2, N);
+ vec_mult (p1, v1, v2, N);
+
+ check (p, p1, N);
+
+ delete [] p;
+ delete [] p1;
+ delete [] v1;
+ delete [] v2;
+
+ return 0;
+}
--- /dev/null
+// { dg-do run }
+// { dg-require-effective-target offload_device }
+
+#include <stdlib.h>
+
+struct typeX
+{
+ int a;
+};
+
+class typeY
+{
+public:
+ int foo () { return a^0x01; }
+ int a;
+};
+
+#pragma omp declare target
+struct typeX varX;
+class typeY varY;
+#pragma omp end declare target
+
+int main ()
+{
+ varX.a = 0;
+ varY.a = 0;
+
+ #pragma omp target
+ {
+ varX.a = 100;
+ varY.a = 100;
+ }
+
+ if (varX.a != 0 || varY.a != 0)
+ abort ();
+
+ #pragma omp target update from(varX, varY)
+
+ if (varX.a != 100 || varY.a != 100)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 100000
+
+void init (int *a1, int *a2)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (int *a, int *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void vec_mult_ref (int *p)
+{
+ int i;
+ int v1[N], v2[N];
+
+ init (v1, v2);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (int *p)
+{
+ int i;
+ int v1[N], v2[N];
+
+ init (v1, v2);
+
+ #pragma omp target map(p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ int p1[N], p2[N];
+ int v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1);
+ vec_mult (p2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 100000
+
+void init (char *a1, char *a2)
+{
+ char s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (char *a, char *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void vec_mult_ref (char *p)
+{
+ int i;
+ char v1[N], v2[N];
+
+ init (v1, v2);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (char *p)
+{
+ int i;
+ char v1[N], v2[N];
+
+ init (v1, v2);
+
+ #pragma omp target map(from: p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ char p1[N], p2[N];
+ char v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1);
+ vec_mult (p2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 100000
+
+void init (long long *a1, long long *a2)
+{
+ long long s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (long long *a, long long *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void vec_mult_ref (long long *p)
+{
+ int i;
+ long long v1[N], v2[N];
+
+ init (v1, v2);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (long long *p)
+{
+ int i;
+ long long v1[N], v2[N];
+
+ init (v1, v2);
+
+ #pragma omp target map(v1, v2, p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ long long p1[N], p2[N];
+ long long v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1);
+ vec_mult (p2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.000001
+#define N 100000
+
+void init (double *a1, double *a2)
+{
+ double s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (double *a, double *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref (double *p, double *v1, double *v2)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (double *p, double *v1, double *v2)
+{
+ int i;
+ #pragma omp target map(to: v1[0:N], v2[:N]) map(from: p[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ double p1[N], p2[N];
+ double v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1, v1, v2);
+ vec_mult (p2, v1, v2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#define EPS 0.000001
+#define N 100000
+#define THRESHOLD1 10000
+#define THRESHOLD2 1000
+
+void init (float *a1, float *a2)
+{
+ float s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void check (float *a, float *b)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *p, float *v1, float *v2)
+{
+ int i;
+ #pragma omp target if(N > THRESHOLD1) map(to: v1[0:N], v2[:N]) \
+ map(from: p[0:N])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ #pragma omp parallel for if(N > THRESHOLD2)
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+ }
+}
+
+int main ()
+{
+ float p1[N], p2[N];
+ float v1[N], v2[N];
+
+ init (v1, v2);
+
+ vec_mult_ref (p1, v1, v2);
+ vec_mult (p2, v1, v2);
+
+ check (p1, p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (long long *a, long long *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (long long *a1, long long *a2, int N)
+{
+ long long s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (long long *p, long long *v1, long long *v2, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (long long *p, long long *v1, long long *v2, int N)
+{
+ int i;
+ #pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p[0:N])
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+int main ()
+{
+ long long *p1 = (long long *) malloc (MAX * sizeof (long long));
+ long long *p2 = (long long *) malloc (MAX * sizeof (long long));
+ long long *v1 = (long long *) malloc (MAX * sizeof (long long));
+ long long *v2 = (long long *) malloc (MAX * sizeof (long long));
+
+ init (v1, v2, MAX);
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (char *a, char *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (char *a1, char *a2, int N)
+{
+ char s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void init_again (char *a1, char *a2, int N)
+{
+ char s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s * 10;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (char *p, char *v1, char *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (char *p, char *v1, char *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ #pragma omp target data map(from: p[0:N])
+ {
+ #pragma omp target map(to: v1[:N], v2[:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ #pragma omp target map(to: v1[:N], v2[:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+}
+
+int main ()
+{
+ char *p1 = (char *) malloc (MAX * sizeof (char));
+ char *p2 = (char *) malloc (MAX * sizeof (char));
+ char *v1 = (char *) malloc (MAX * sizeof (char));
+ char *v2 = (char *) malloc (MAX * sizeof (char));
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int ROWS = 5;
+const int COLS = 5;
+
+void init (int Q[][COLS], const int rows, const int cols)
+{
+ int i, j;
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < cols; j++)
+ Q[i][j] = (i + 1) * 100 + (j + 1);
+}
+
+void check (int a[][COLS], int b[][COLS], const int rows, const int cols)
+{
+ int i, j;
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < cols; j++)
+ if (a[i][j] != b[i][j])
+ abort ();
+}
+
+void gramSchmidt_ref (int Q[][COLS], const int rows, const int cols)
+{
+ int i, k;
+
+ for (k = 0; k < cols; k++)
+ {
+ int tmp = 0;
+
+ for (i = 0; i < rows; i++)
+ tmp += (Q[i][k] * Q[i][k]);
+
+ for (i = 0; i < rows; i++)
+ Q[i][k] *= tmp;
+ }
+}
+
+void gramSchmidt (int Q[][COLS], const int rows, const int cols)
+{
+ int i, k;
+
+ #pragma omp target data map(Q[0:rows][0:cols]) map(to:COLS)
+ for (k = 0; k < cols; k++)
+ {
+ int tmp = 0;
+
+ #pragma omp target
+ #pragma omp parallel for reduction(+:tmp)
+ for (i = 0; i < rows; i++)
+ tmp += (Q[i][k] * Q[i][k]);
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < rows; i++)
+ Q[i][k] *= tmp;
+ }
+}
+
+int main ()
+{
+ int (*Q1)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
+ int (*Q2)[COLS] = (int(*)[COLS]) malloc (ROWS * COLS * sizeof (int));
+
+ init (Q1, ROWS, COLS);
+ init (Q2, ROWS, COLS);
+
+ gramSchmidt_ref (Q1, ROWS, COLS);
+ gramSchmidt (Q2, ROWS, COLS);
+
+ check (Q1, Q2, ROWS, COLS);
+
+ free (Q1);
+ free (Q2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.000001
+
+const int MAX = 1800;
+
+void check (double *a, double *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void init (double *a1, double *a2, int N)
+{
+ double s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (double *p1, double *v3, double *v4, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p1[i] = v3[i] * v4[i];
+}
+
+void foo_ref (double *p0, double *v1, double *v2, int N)
+{
+ init (v1, v2, N);
+ vec_mult_ref (p0, v1, v2, N);
+}
+
+void vec_mult (double *p1, double *v3, double *v4, int N)
+{
+ int i;
+ #pragma omp target map(to: v3[0:N], v4[:N]) map(from: p1[0:N])
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p1[i] = v3[i] * v4[i];
+}
+
+void foo (double *p0, double *v1, double *v2, int N)
+{
+ init (v1, v2, N);
+
+ #pragma omp target data map(to: v1[0:N], v2[:N]) map(from: p0[0:N])
+ vec_mult (p0, v1, v2, N);
+}
+
+int main ()
+{
+ double *p1 = (double *) malloc (MAX * sizeof (double));
+ double *p2 = (double *) malloc (MAX * sizeof (double));
+ double *v1 = (double *) malloc (MAX * sizeof (double));
+ double *v2 = (double *) malloc (MAX * sizeof (double));
+
+ foo_ref (p1, v1, v2, MAX);
+ foo (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+#define EPS 0.000001
+#define THRESHOLD 1000
+
+const int MAX = 1800;
+
+void check (float *a, float *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] - b[i] > EPS || b[i] - a[i] > EPS)
+ abort ();
+}
+
+void init (float *a1, float *a2, int N)
+{
+ float s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void init_again (float *a1, float *a2, int N)
+{
+ float s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s * 10;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (float *p, float *v1, float *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ #pragma omp target data if(N > THRESHOLD) map(from: p[0:N])
+ {
+ #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
+ {
+ if (omp_is_initial_device ())
+ abort;
+
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+ }
+
+ init_again (v1, v2, N);
+
+ #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (MAX * sizeof (float));
+ float *p2 = (float *) malloc (MAX * sizeof (float));
+ float *v1 = (float *) malloc (MAX * sizeof (float));
+ float *v2 = (float *) malloc (MAX * sizeof (float));
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <stdlib.h>
+#include <omp.h>
+
+#define THRESHOLD 1000
+
+const int MAX = 1800;
+
+void check (short *a, short *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (short *a1, short *a2, int N)
+{
+ short s = -1;
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (short *p, short *v1, short *v2, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (short *p, short *v1, short *v2, int N)
+{
+ int i;
+ #pragma omp target data map(from: p[0:N])
+ #pragma omp target if (N > THRESHOLD) map(to: v1[:N], v2[:N])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+ }
+}
+
+int main ()
+{
+ short *p1 = (short *) malloc (MAX * sizeof (short));
+ short *p2 = (short *) malloc (MAX * sizeof (short));
+ short *v1 = (short *) malloc (MAX * sizeof (short));
+ short *v2 = (short *) malloc (MAX * sizeof (short));
+
+ init (v1, v2, MAX);
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (int *a, int *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (int *a1, int *a2, int N)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void init_again (int *a1, int *a2, int N)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s * 10;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+void vec_mult_ref (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, MAX);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, MAX);
+
+ #pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
+ {
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ init_again (v1, v2, N);
+
+ #pragma omp target update to(v1[:N], v2[:N])
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+}
+
+int main ()
+{
+ int *p1 = (int *) malloc (MAX * sizeof (int));
+ int *p2 = (int *) malloc (MAX * sizeof (int));
+ int *v1 = (int *) malloc (MAX * sizeof (int));
+ int *v2 = (int *) malloc (MAX * sizeof (int));
+
+ vec_mult_ref (p1, v1, v2, MAX);
+ vec_mult (p2, v1, v2, MAX);
+
+ check (p1, p2, MAX);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+const int MAX = 1800;
+
+void check (int *a, int *b, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (a[i] != b[i])
+ abort ();
+}
+
+void init (int *a1, int *a2, int N)
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ a1[i] = s;
+ a2[i] = i;
+ s = -s;
+ }
+}
+
+int maybe_init_again (int *a, int N)
+{
+ int i;
+ for (i = 0; i < N; i++)
+ a[i] = i;
+ return 1;
+}
+
+void vec_mult_ref (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ maybe_init_again (v1, N);
+ maybe_init_again (v2, N);
+
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+}
+
+void vec_mult (int *p, int *v1, int *v2, int N)
+{
+ int i;
+
+ init (v1, v2, N);
+
+ #pragma omp target data map(to: v1[:N], v2[:N]) map(from: p[0:N])
+ {
+ int changed;
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = v1[i] * v2[i];
+
+ changed = maybe_init_again (v1, N);
+ #pragma omp target update if (changed) to(v1[:N])
+
+ changed = maybe_init_again (v2, N);
+ #pragma omp target update if (changed) to(v2[:N])
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p[i] = p[i] + (v1[i] * v2[i]);
+ }
+}
+
+int main ()
+{
+ int *p = (int *) malloc (MAX * sizeof (int));
+ int *p1 = (int *) malloc (MAX * sizeof (int));
+ int *v1 = (int *) malloc (MAX * sizeof (int));
+ int *v2 = (int *) malloc (MAX * sizeof (int));
+
+ vec_mult_ref (p, v1, v2, MAX);
+ vec_mult (p1, v1, v2, MAX);
+
+ check (p, p1, MAX);
+
+ free (p);
+ free (p1);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define THRESHOLD 20
+
+#pragma omp declare target
+int fib (int n)
+{
+ if (n <= 0)
+ return 0;
+ else if (n == 1)
+ return 1;
+ else
+ return fib (n - 1) + fib (n - 2);
+}
+#pragma omp end declare target
+
+int fib_wrapper (int n)
+{
+ int x = 0;
+
+ #pragma omp target if(n > THRESHOLD)
+ x = fib (n);
+
+ return x;
+}
+
+int main ()
+{
+ if (fib (15) != fib_wrapper (15))
+ abort ();
+ if (fib (25) != fib_wrapper (25))
+ abort ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.000001
+#define N 100000
+
+#pragma omp declare target
+float p1[N], p2[N], v1[N], v2[N];
+#pragma omp end declare target
+
+void init ()
+{
+ int i, s = -1;
+ for (i = 0; i < N; i++)
+ {
+ v1[i] = s * 0.01;
+ v2[i] = i;
+ s = -s;
+ }
+}
+
+void check ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ if (p1[i] - p2[i] > EPS || p2[i] - p1[i] > EPS)
+ abort ();
+}
+
+void vec_mult_ref ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ p1[i] = v1[i] * v2[i];
+}
+
+void vec_mult ()
+{
+ int i;
+
+ #pragma omp target update to(v1, v2)
+
+ #pragma omp target
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ p2[i] = v1[i] * v2[i];
+
+ #pragma omp target update from(p2)
+}
+
+int main ()
+{
+ init ();
+
+ vec_mult_ref ();
+ vec_mult ();
+
+ check ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 1000
+
+#pragma omp declare target
+float Q[N][N];
+float Pfun (const int i, const int k)
+{
+ return Q[i][k] * Q[k][i];
+}
+#pragma omp end declare target
+
+void init ()
+{
+ int i, j;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ Q[i][j] = 0.001 * i * j;
+}
+
+float accum_ref (int k)
+{
+ int i;
+ float tmp = 0.0;
+
+ for (i = 0; i < N; i++)
+ tmp += Pfun (i, k);
+
+ return tmp;
+}
+
+float accum (int k)
+{
+ int i;
+ float tmp = 0.0;
+
+ #pragma omp target
+ #pragma omp parallel for reduction(+:tmp)
+ for (i = 0; i < N; i++)
+ tmp += Pfun (i, k);
+
+ return tmp;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ int i;
+
+ init ();
+
+ #pragma omp target update to(Q)
+
+ for (i = 0; i < N; i++)
+ check (accum (i), accum_ref (i));
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-msse2" { target sse2_runtime } } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+#define M 1024
+
+#pragma omp declare target
+float Q[N][N];
+#pragma omp declare simd uniform(i) linear(k) notinbranch
+float Pfun (const int i, const int k)
+{
+ return Q[i][k] * Q[k][i];
+}
+#pragma omp end declare target
+
+void init ()
+{
+ int i, j;
+ for (i = 0; i < N; i++)
+ for (j = 0; j < N; j++)
+ Q[i][j] = 0.001 * i * j;
+}
+
+float accum_ref ()
+{
+ int i, k;
+ float tmp = 0.0;
+
+ for (i = 0; i < N; i++)
+ {
+ float tmp1 = 0.0;
+
+ for (k = 0; k < M; k++)
+ tmp1 += Pfun(i,k);
+
+ tmp += tmp1;
+ }
+
+ return tmp;
+}
+
+float accum ()
+{
+ int i, k;
+ float tmp = 0.0;
+
+ #pragma omp target
+ #pragma omp parallel for reduction(+:tmp)
+ for (i = 0; i < N; i++)
+ {
+ float tmp1 = 0.0;
+
+ #pragma omp simd reduction(+:tmp1)
+ for (k = 0; k < M; k++)
+ tmp1 += Pfun(i,k);
+
+ tmp += tmp1;
+ }
+
+ return tmp;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ init ();
+
+ #pragma omp target update to(Q)
+
+ check (accum (), accum_ref ());
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.0001
+#define N 1024*1024
+
+void init (float B[], float C[], int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ B[i] = 0.1 * i;
+ C[i] = 0.01 * i * i;
+ }
+}
+
+float dotprod_ref (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0.0;
+
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+float dotprod (float B[], float C[], int n, int block_size,
+ int num_teams, int block_threads)
+{
+ int i, i0;
+ float sum = 0;
+
+ #pragma omp target map(to: B[0:n], C[0:n])
+ #pragma omp teams num_teams(num_teams) thread_limit(block_threads) \
+ reduction(+:sum)
+ #pragma omp distribute
+ for (i0 = 0; i0 < n; i0 += block_size)
+ #pragma omp parallel for reduction(+:sum)
+ for (i = i0; i < ((i0 + block_size > n) ? n : i0 + block_size); i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ float p1, p2;
+
+ init (v1, v2, N);
+
+ p1 = dotprod_ref (v1, v2, N);
+ p2 = dotprod (v1, v2, N, 32, 2, 8);
+
+ check (p1, p2);
+
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.0001
+#define N 1024*1024
+
+void init (float B[], float C[], int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ B[i] = 0.1 * i;
+ C[i] = 0.01 * i * i;
+ }
+}
+
+float dotprod_ref (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0.0;
+
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+float dotprod (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0;
+
+ #pragma omp target teams map(to: B[0:n], C[0:n])
+ #pragma omp distribute parallel for reduction(+:sum)
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ float p1, p2;
+
+ init (v1, v2, N);
+
+ p1 = dotprod_ref (v1, v2, N);
+ p2 = dotprod (v1, v2, N);
+
+ check (p1, p2);
+
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.0001
+#define N 1024*1024
+
+void init (float B[], float C[], int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ B[i] = 0.1 * i;
+ C[i] = 0.01 * i * i;
+ }
+}
+
+float dotprod_ref (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0.0;
+
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+float dotprod (float B[], float C[], int n)
+{
+ int i;
+ float sum = 0;
+
+ #pragma omp target map(to: B[0:n], C[0:n])
+ #pragma omp teams num_teams(8) thread_limit(16)
+ #pragma omp distribute parallel for reduction(+:sum) \
+ dist_schedule(static, 1024) \
+ schedule(static, 64)
+ for (i = 0; i < n; i++)
+ sum += B[i] * C[i];
+
+ return sum;
+}
+
+void check (float a, float b)
+{
+ float err = (b == 0.0) ? a : (a - b) / b;
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+}
+
+int main ()
+{
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ float p1, p2;
+
+ init (v1, v2, N);
+
+ p1 = dotprod_ref (v1, v2, N);
+ p2 = dotprod (v1, v2, N);
+
+ check (p1, p2);
+
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+
+void init (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = 0.1 * i;
+ b[i] = 0.01 * i * i;
+ }
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ #pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
+ #pragma omp distribute simd
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void check (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0 ; i < n ; i++)
+ {
+ float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (N * sizeof (float));
+ float *p2 = (float *) malloc (N * sizeof (float));
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ init (v1, v2, N);
+
+ vec_mult_ref (p1, v1, v2, N);
+ vec_mult (p2, v1, v2, N);
+
+ check (p1, p2, N);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+
+void init (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = 0.1 * i;
+ b[i] = 0.01 * i * i;
+ }
+}
+
+void vec_mult_ref (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void vec_mult (float *p, float *v1, float *v2, int n)
+{
+ int i;
+ #pragma omp target teams map(to: v1[0:n], v2[:n]) map(from: p[0:n])
+ #pragma omp distribute parallel for simd
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+}
+
+void check (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0 ; i < n ; i++)
+ {
+ float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (N * sizeof (float));
+ float *p2 = (float *) malloc (N * sizeof (float));
+ float *v1 = (float *) malloc (N * sizeof (float));
+ float *v2 = (float *) malloc (N * sizeof (float));
+
+ init (v1, v2, N);
+
+ vec_mult_ref (p1, v1, v2, N);
+ vec_mult (p2, v1, v2, N);
+
+ check (p1, p2, N);
+
+ free (p1);
+ free (p2);
+ free (v1);
+ free (v2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 100000
+#define CHUNKSZ 1000
+
+float Y[N];
+float Z[N];
+
+#pragma omp declare target
+float F (float a)
+{
+ return -a;
+}
+#pragma omp end declare target
+
+void pipedF_ref ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ Y[i] = F (Y[i]);
+}
+
+void pipedF ()
+{
+ int i, C;
+ for (C = 0; C < N; C += CHUNKSZ)
+ {
+ #pragma omp task
+ #pragma omp target map(Z[C:CHUNKSZ])
+ #pragma omp parallel for
+ for (i = C; i < C + CHUNKSZ; i++)
+ Z[i] = F (Z[i]);
+ }
+ #pragma omp taskwait
+}
+
+void init ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ Y[i] = Z[i] = 0.1 * i;
+}
+
+void check ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ {
+ float err = (Z[i] == 0.0) ? Y[i] : (Y[i] - Z[i]) / Z[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ init ();
+
+ pipedF_ref ();
+ pipedF ();
+
+ check ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#define EPS 0.00001
+#define N 10000
+
+#pragma omp declare target
+void init (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i] = 0.1 * i;
+ b[i] = 0.01 * i * i;
+ }
+}
+#pragma omp end declare target
+
+void vec_mult_ref (float *p, float *v1, float *v2, int n)
+{
+ int i;
+
+ v1 = (float *) malloc (n * sizeof (float));
+ v2 = (float *) malloc (n * sizeof (float));
+
+ init (v1, v2, n);
+
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+
+ free (v1);
+ free (v2);
+}
+
+void vec_mult (float *p, float *v1, float *v2, int n)
+{
+ int i;
+
+ #pragma omp task shared(v1, v2) depend(out: v1, v2)
+ #pragma omp target map(v1, v2)
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ v1 = (float *) malloc (n * sizeof (float));
+ v2 = (float *) malloc (n * sizeof (float));
+
+ init (v1, v2, n);
+ }
+
+ #pragma omp task shared(v1, v2) depend(in: v1, v2)
+ #pragma omp target map(to: v1, v2) map(from: p[0:n])
+ {
+ if (omp_is_initial_device ())
+ abort ();
+
+ #pragma omp parallel for
+ for (i = 0; i < n; i++)
+ p[i] = v1[i] * v2[i];
+
+ free (v1);
+ free (v2);
+ }
+}
+
+void check (float *a, float *b, int n)
+{
+ int i;
+ for (i = 0 ; i < n ; i++)
+ {
+ float err = (a[i] == 0.0) ? b[i] : (b[i] - a[i]) / a[i];
+ if (((err > 0) ? err : -err) > EPS)
+ abort ();
+ }
+}
+
+int main ()
+{
+ float *p1 = (float *) malloc (N * sizeof (float));
+ float *p2 = (float *) malloc (N * sizeof (float));
+ float *v1, *v2;
+
+ vec_mult_ref (p1, v1, v2, N);
+ vec_mult (p2, v1, v2, N);
+
+ check (p1, p2, N);
+
+ free (p1);
+ free (p2);
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+void foo ()
+{
+ int A[30], *p;
+ #pragma omp target data map(A[0:4])
+ {
+ p = &A[0];
+ #pragma omp target map(p[7:20]) map(A[0:4])
+ {
+ A[2] = 777;
+ p[8] = 777;
+ }
+ }
+
+ if (A[2] != 777 || A[8] != 777)
+ abort ();
+}
+
+int main ()
+{
+ foo ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+void foo ()
+{
+ int A[30], *p;
+ #pragma omp target data map(A[0:10])
+ {
+ p = &A[0];
+ #pragma omp target map(p[3:7]) map(A[0:10])
+ {
+ A[2] = 777;
+ A[8] = 777;
+ p[8] = 999;
+ }
+ }
+
+ if (A[2] != 777 || A[8] != 999)
+ abort ();
+}
+
+int main ()
+{
+ foo ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int main ()
+{
+ int a = 100;
+ int b = 0;
+ int c, d;
+
+ #pragma omp target if(a > 200 && a < 400)
+ c = omp_is_initial_device ();
+
+ #pragma omp target data map(to: b) if(a > 200 && a < 400)
+ #pragma omp target
+ {
+ b = 100;
+ d = omp_is_initial_device ();
+ }
+
+ if (b != 100 || !c || d)
+ abort ();
+
+ a += 200;
+ b = 0;
+
+ #pragma omp target if(a > 200 && a < 400)
+ c = omp_is_initial_device ();
+
+ #pragma omp target data map(to: b) if(a > 200 && a < 400)
+ #pragma omp target
+ {
+ b = 100;
+ d = omp_is_initial_device ();
+ }
+
+ if (b != 0 || c || d)
+ abort ();
+
+ a += 200;
+ b = 0;
+
+ #pragma omp target if(a > 200 && a < 400)
+ c = omp_is_initial_device ();
+
+ #pragma omp target data map(to: b) if(a > 200 && a < 400)
+ #pragma omp target
+ {
+ b = 100;
+ d = omp_is_initial_device ();
+ }
+
+ if (b != 100 || !c || d)
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#define N 10
+
+int main ()
+{
+ int i;
+ int offload[N];
+ int num = omp_get_num_devices();
+
+ #pragma omp parallel for
+ for (i = 0; i < N; i++)
+ #pragma omp target device(i) map(from: offload[i:1])
+ offload[i] = omp_is_initial_device ();
+
+ for (i = 0; i < num; i++)
+ if (offload[i])
+ abort ();
+
+ for (i = num; i < N; i++)
+ if (!offload[i])
+ abort ();
+
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-require-effective-target offload_device } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int main ()
+{
+ int res;
+ int default_device = omp_get_default_device ();
+
+ #pragma omp target
+ res = omp_is_initial_device ();
+
+ if (res)
+ abort ();
+
+ omp_set_default_device (omp_get_num_devices ());
+
+ #pragma omp target
+ res = omp_is_initial_device ();
+
+ if (!res)
+ abort ();
+
+ return 0;
+}
if (omp_get_level () != 0 || !omp_is_initial_device ())
abort ();
#pragma omp target if (v <= 1)
- if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
+ if (omp_get_level () != 0)
abort ();
#pragma omp target device (d) if (v <= 1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
if (omp_get_level () != 0 || !omp_is_initial_device ())
abort ();
#pragma omp target if (1)
- if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
+ if (omp_get_level () != 0)
abort ();
#pragma omp target device (d) if (1)
if (omp_get_level () != 0 || (f && !omp_is_initial_device ()))
#pragma omp target data if (v <= 1) map (to: h)
{
#pragma omp target if (v <= 1)
- if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 8)
+ if (omp_get_level () != 0 || h++ != 8)
abort ();
#pragma omp target update if (v <= 1) from (h)
}
#pragma omp target data if (1) map (to: h)
{
#pragma omp target if (1)
- if (omp_get_level () != 0 || (f && !omp_is_initial_device ()) || h++ != 12)
+ if (omp_get_level () != 0 || h++ != 12)
abort ();
#pragma omp target update if (1) from (h)
}
--- /dev/null
+! { dg-do run }
+
+module e_50_1_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+
+end module
+
+program e_50_1
+ use e_50_1_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_50_2_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target map(v1,v2,p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+end module
+
+program e_50_2
+ use e_50_2_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_50_3_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target map(to: v1,v2) map(from: p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+end module
+
+program e_50_3
+ use e_50_3_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_50_4_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: v1, v2
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real, pointer, dimension(:) :: p
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult_1 (p, v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: p, v1, v2
+ !$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+
+ subroutine vec_mult_2 (p, v1, v2, N)
+ real, dimension(*) :: p, v1, v2
+ integer :: i, N
+ !$omp target map(to: v1(1:N), v2(:N)) map(from: p(1:N))
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+end module
+
+program e_50_4
+ use e_50_4_mod, only : init, check, vec_mult_1, vec_mult_2
+ real, pointer, dimension(:) :: p1, p2, v1, v2
+ integer :: n
+ n = 1000
+ allocate (p1(n), p2(n), v1(n), v2(n))
+ call init (v1, v2, n)
+ call vec_mult_1 (p1, v1, v2, n)
+ call vec_mult_2 (p2, v1, v2, n)
+ call check (p1, N)
+ call check (p2, N)
+ deallocate (p1, p2, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+module e_50_5_mod
+integer, parameter :: THRESHOLD1 = 500, THRESHOLD2 = 100
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ use omp_lib, only: omp_is_initial_device
+ integer :: i, N
+ real :: p(N), v1(N), v2(N)
+ call init (v1, v2, N)
+ !$omp target if(N > THRESHOLD1) map(to: v1,v2) map(from: p)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do if(N > THRESHOLD2)
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call check (p, N)
+ end subroutine
+end module
+
+program e_50_5
+ use e_50_5_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_1_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_1
+ use e_51_1_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_2_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data map(from: p)
+ !$omp target map(to: v1, v2 )
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call init_again (v1, v2, N)
+ !$omp target map(to: v1, v2 )
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_2
+ use e_51_2_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_3_mod
+contains
+ subroutine init (Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision :: Q(rows,cols)
+ do k = 1, cols
+ do i = 1, rows
+ Q(i,k) = 10 * i + k
+ end do
+ end do
+ end subroutine
+
+ subroutine check (P, Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision, parameter :: EPS = 0.00001
+ double precision :: P(rows,cols), Q(rows,cols), diff
+ do k = 1, cols
+ do i = 1, rows
+ diff = P(i,k) - Q(i,k)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end do
+ end subroutine
+
+ subroutine gramSchmidt_ref (Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision :: Q(rows,cols), tmp
+ do k = 1, cols
+ tmp = 0.0d0
+ do i = 1, rows
+ tmp = tmp + (Q(i,k) * Q(i,k))
+ end do
+ tmp = 1.0d0 / sqrt (tmp)
+ do i = 1, rows
+ Q(i,k) = Q(i,k) * tmp
+ end do
+ end do
+ end subroutine
+
+ subroutine gramSchmidt (Q, rows, cols)
+ integer :: i, k, rows, cols
+ double precision :: Q(rows,cols), tmp
+ !$omp target data map(Q)
+ do k = 1, cols
+ tmp = 0.0d0
+ !$omp target
+ !$omp parallel do reduction(+:tmp)
+ do i = 1, rows
+ tmp = tmp + (Q(i,k) * Q(i,k))
+ end do
+ !$omp end target
+ tmp = 1.0d0 / sqrt (tmp)
+ !$omp target
+ !$omp parallel do
+ do i = 1, rows
+ Q(i,k) = Q(i,k) * tmp
+ end do
+ !$omp end target
+ end do
+ !$omp end target data
+ end subroutine
+end module
+
+program e_51_3
+ use e_51_3_mod, only : init, check, gramSchmidt, gramSchmidt_ref
+ integer :: cols, rows
+ double precision, pointer :: P(:,:), Q(:,:)
+ cols = 5
+ rows = 5
+ allocate (P(rows,cols), Q(rows,cols))
+ call init (P, rows, cols)
+ call init (Q, rows, cols)
+ call gramSchmidt_ref (P, rows, cols)
+ call gramSchmidt (Q, rows, cols)
+ call check (P, Q, rows, cols)
+ deallocate (P, Q)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_4_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine foo (p, v1, v2, N)
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ call vec_mult (p, v1, v2, N)
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: i, N
+ !$omp target map(to: v1, v2) map(from: p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+end module
+
+program e_51_4
+ use e_51_4_mod, only : foo
+ integer :: n
+ real, pointer, dimension(:) :: p, v1, v2
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call foo (p, v1, v2, n)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_51_5_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine foo (p, v1, v2, N)
+ real, dimension(:) :: p, v1, v2
+ integer :: N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2, N) map(from: p)
+ call vec_mult (p, v1, v2, N)
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real, dimension(:) :: p, v1, v2
+ integer :: i, N
+ !$omp target map(to: v1, v2, N) map(from: p)
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ end subroutine
+end module
+
+program e_51_5
+ use e_51_5_mod, only : foo
+ integer, parameter :: N = 1024
+ real, allocatable, dimension(:) :: p, v1, v2
+ allocate(p(N), v1(N), v2(N))
+ call foo (p, v1, v2, N)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+module e_51_6_mod
+integer, parameter :: THRESHOLD = 500
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ use omp_lib, only: omp_is_initial_device
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data if(N > THRESHOLD) map(from: p)
+ !$omp target if(N > THRESHOLD) map(to: v1, v2)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call init_again (v1, v2, N)
+ !$omp target if(N > THRESHOLD) map(to: v1, v2)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_6
+ use e_51_6_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+module e_51_7_mod
+integer, parameter :: THRESHOLD = 500
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (N)
+ use omp_lib, only: omp_is_initial_device
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data if(N > THRESHOLD) map(to: v1, v2) map(from: p)
+ !$omp target
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_51_7
+ use e_51_7_mod, only : vec_mult
+ integer :: n
+ n = 1000
+ call vec_mult (n)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_52_1_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - 2 * (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ call init_again (v1, v2, N)
+ !$omp target update to(v1, v2)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_52_1
+ use e_52_1_mod, only : vec_mult
+ integer :: n
+ real, pointer :: p(:), v1(:), v2(:)
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call vec_mult (p, v1, v2, n)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_52_2_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine init_again (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i - 3.0
+ v2(i) = i + 2.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i * i + (i + 2.0) * (i - 3.0))
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ logical function maybe_init_again (v, N)
+ real :: v(N)
+ integer :: i, N
+ do i = 1, N
+ v(i) = i
+ end do
+ maybe_init_again = .true.
+ end function
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ logical :: changed
+ call init (v1, v2, N)
+ !$omp target data map(to: v1, v2) map(from: p)
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ changed = maybe_init_again (v1, N)
+ !$omp target update if(changed) to(v1(:N))
+ changed = maybe_init_again (v2, N)
+ !$omp target update if(changed) to(v2(:N))
+ !$omp target
+ !$omp parallel do
+ do i = 1, N
+ p(i) = p(i) + v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp end target data
+ call check (p, N)
+ end subroutine
+end module
+
+program e_52_2
+ use e_52_2_mod, only : vec_mult
+ integer :: n
+ real, pointer :: p(:), v1(:), v2(:)
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call vec_mult (p, v1, v2, n)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_53_1_mod
+ integer :: THRESHOLD = 20
+contains
+ integer recursive function fib (n) result (f)
+ !$omp declare target
+ integer :: n
+ if (n <= 0) then
+ f = 0
+ else if (n == 1) then
+ f = 1
+ else
+ f = fib (n - 1) + fib (n - 2)
+ end if
+ end function
+
+ integer function fib_wrapper (n)
+ integer :: x
+ !$omp target map(to: n) if(n > THRESHOLD)
+ x = fib (n)
+ !$omp end target
+ fib_wrapper = x
+ end function
+end module
+
+program e_53_1
+ use e_53_1_mod, only : fib, fib_wrapper
+ if (fib (15) /= fib_wrapper (15)) call abort
+ if (fib (25) /= fib_wrapper (25)) call abort
+end program
--- /dev/null
+! { dg-do run }
+
+program e_53_2
+ !$omp declare target (fib)
+ integer :: x, fib
+ !$omp target
+ x = fib (25)
+ !$omp end target
+ if (x /= fib (25)) call abort
+end program
+
+integer recursive function fib (n) result (f)
+ !$omp declare target
+ integer :: n
+ if (n <= 0) then
+ f = 0
+ else if (n == 1) then
+ f = 1
+ else
+ f = fib (n - 1) + fib (n - 2)
+ end if
+end function
--- /dev/null
+! { dg-do run }
+
+module e_53_3_mod
+ !$omp declare target (N, p, v1, v2)
+ integer, parameter :: N = 1000
+ real :: p(N), v1(N), v2(N)
+end module
+
+subroutine init (v1, v2, N)
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+end subroutine
+
+subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+end subroutine
+
+subroutine vec_mult ()
+ use e_53_3_mod
+ integer :: i
+ call init (v1, v2, N);
+ !$omp target update to(v1, v2)
+ !$omp target
+ !$omp parallel do
+ do i = 1,N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target
+ !$omp target update from (p)
+ call check (p, N)
+end subroutine
+
+program e_53_3
+ call vec_mult ()
+end program
--- /dev/null
+! { dg-do run }
+
+module e_53_4_mod
+ !$omp declare target (N, Q)
+ integer, parameter :: N = 10
+ real :: Q(N,N)
+contains
+ real function Pfun (i, k)
+ !$omp declare target
+ integer, intent(in) :: i, k
+ Pfun = (Q(i,k) * Q(k,i))
+ end function
+end module
+
+real function accum (k) result (tmp)
+ use e_53_4_mod
+ integer :: i, k
+ tmp = 0.0e0
+ !$omp target
+ !$omp parallel do reduction(+:tmp)
+ do i = 1, N
+ tmp = tmp + Pfun (k, i)
+ end do
+ !$omp end target
+end function
+
+real function accum_ref (k) result (tmp)
+ use e_53_4_mod
+ integer :: i, k
+ tmp = 0.0e0
+ do i = 1, N
+ tmp = tmp + Pfun (k, i)
+ end do
+end function
+
+subroutine init ()
+ use e_53_4_mod
+ integer :: i, j
+ do i = 1, N
+ do j = 1, N
+ Q(i,j) = 0.001 * i * j
+ end do
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.00001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_53_4
+ use e_53_4_mod
+ integer :: i
+ real :: accum, accum_ref
+ call init ()
+ !$omp target update to(Q)
+ do i = 1, N
+ call check (accum (i), accum_ref (i))
+ end do
+end program
--- /dev/null
+! { dg-do run }
+! { dg-options "-O2" }
+! { dg-additional-options "-msse2" { target sse2_runtime } }
+! { dg-additional-options "-mavx" { target avx_runtime } }
+
+module e_53_5_mod
+ !$omp declare target (N, Q)
+ integer, parameter :: N = 10000, M = 1024
+ real :: Q(N,N)
+contains
+ real function Pfun (k, i)
+ !$omp declare simd(Pfun) uniform(i) linear(k) notinbranch
+ !$omp declare target
+ integer, value, intent(in) :: i, k
+ Pfun = (Q(k,i) * Q(i,k))
+ end function
+end module
+
+real function accum () result (tmp)
+ use e_53_5_mod
+ real :: tmp1
+ integer :: i
+ tmp = 0.0e0
+ !$omp target
+ !$omp parallel do private(tmp1) reduction(+:tmp)
+ do i = 1, N
+ tmp1 = 0.0e0
+ !$omp simd reduction(+:tmp1)
+ do k = 1, M
+ tmp1 = tmp1 + Pfun (k, i)
+ end do
+ tmp = tmp + tmp1
+ end do
+ !$omp end target
+end function
+
+real function accum_ref () result (tmp)
+ use e_53_5_mod
+ real :: tmp1
+ integer :: i
+ tmp = 0.0e0
+ do i = 1, N
+ tmp1 = 0.0e0
+ do k = 1, M
+ tmp1 = tmp1 + Pfun (k, i)
+ end do
+ tmp = tmp + tmp1
+ end do
+end function
+
+subroutine init ()
+ use e_53_5_mod
+ integer :: i, j
+ do i = 1, N
+ do j = 1, N
+ Q(i,j) = 0.001 * i * j
+ end do
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.00001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_53_5
+ use e_53_5_mod
+ real :: accum, accum_ref, d
+ call init ()
+ !$omp target update to(Q)
+ call check (accum (), accum_ref ())
+end program
--- /dev/null
+! { dg-do run }
+
+function dotprod_ref (B, C, N) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+end function
+
+function dotprod (B, C, N, block_size, num_teams, block_threads) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, block_size, num_teams, block_threads, i, i0
+ sum = 0.0e0
+ !$omp target map(to: B, C, block_size, num_teams, block_threads)
+ !$omp teams num_teams(num_teams) thread_limit(block_threads) &
+ !$omp& reduction(+:sum)
+ !$omp distribute
+ do i0 = 1, N, block_size
+ !$omp parallel do reduction(+:sum)
+ do i = i0, min (i0 + block_size - 1, N)
+ sum = sum + B(i) * C(i)
+ end do
+ end do
+ !$omp end teams
+ !$omp end target
+end function
+
+subroutine init (B, C, N)
+ real :: B(N), C(N)
+ integer :: N, i
+ do i = 1, N
+ B(i) = 0.0001 * i
+ C(i) = 0.000001 * i * i
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.0001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_54_1
+ integer :: n
+ real :: ref, d
+ real, pointer, dimension(:) :: B, C
+ n = 1024 * 1024
+ allocate (B(n), C(n))
+ call init (B, C, n)
+ ref = dotprod_ref (B, C, n)
+ d = dotprod (B, C, n, 32, 2, 8)
+ call check (ref, d)
+ deallocate (B, C)
+end program
--- /dev/null
+! { dg-do run }
+
+function dotprod_ref (B, C, N) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+end function
+
+function dotprod (B, C, N) result(sum)
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ !$omp target teams map(to: B, C)
+ !$omp distribute parallel do reduction(+:sum)
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+ !$omp end target teams
+end function
+
+subroutine init (B, C, N)
+ real :: B(N), C(N)
+ integer :: N, i
+ do i = 1, N
+ B(i) = 0.0001 * i
+ C(i) = 0.000001 * i * i
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.0001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_54_3
+ integer :: n
+ real :: ref, d
+ real, pointer, dimension(:) :: B, C
+ n = 1024 * 1024
+ allocate (B(n), C(n))
+ call init (B, C, n)
+ ref = dotprod_ref (B, C, n)
+ d = dotprod (B, C, n)
+ call check (ref, d)
+ deallocate (B, C)
+end program
--- /dev/null
+! { dg-do run }
+
+function dotprod_ref (B, C, N) result (sum)
+ implicit none
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+end function
+
+function dotprod (B, C, n) result(sum)
+ real :: B(N), C(N), sum
+ integer :: N, i
+ sum = 0.0e0
+ !$omp target map(to: B, C)
+ !$omp teams num_teams(8) thread_limit(16)
+ !$omp distribute parallel do reduction(+:sum) &
+ !$omp& dist_schedule(static, 1024) schedule(static, 64)
+ do i = 1, N
+ sum = sum + B(i) * C(i)
+ end do
+ !$omp end teams
+ !$omp end target
+end function
+
+subroutine init (B, C, N)
+ real :: B(N), C(N)
+ integer :: N, i
+ do i = 1, N
+ B(i) = 0.0001 * i
+ C(i) = 0.000001 * i * i
+ end do
+end subroutine
+
+subroutine check (a, b)
+ real :: a, b, err
+ real, parameter :: EPS = 0.0001
+ if (b == 0.0) then
+ err = a
+ else if (a == 0.0) then
+ err = b
+ else
+ err = (a - b) / b
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+end subroutine
+
+program e_54_4
+ integer :: n
+ real :: ref, d
+ real, pointer, dimension(:) :: B, C
+ n = 1024 * 1024
+ allocate (B(n), C(n))
+ call init (B, C, n)
+ ref = dotprod_ref (B, C, n)
+ d = dotprod (B, C, n)
+ call check (ref, d)
+ deallocate (B, C)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_54_5_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: v1, v2
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real, pointer, dimension(:) :: p
+ real :: diff
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ !$omp target teams map(to: v1, v2) map(from: p)
+ !$omp distribute simd
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target teams
+ end subroutine
+end module
+
+program e_54_5
+ use e_54_5_mod, only : init, check, vec_mult
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: n
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call init (v1, v2, n)
+ call vec_mult (p, v1, v2, n)
+ call check (p, N)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_54_6_mod
+contains
+ subroutine init (v1, v2, N)
+ integer :: i, N
+ real, pointer, dimension(:) :: v1, v2
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+ end subroutine
+
+ subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real, pointer, dimension(:) :: p
+ real :: diff
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+ end subroutine
+
+ subroutine vec_mult (p, v1, v2, N)
+ real :: p(N), v1(N), v2(N)
+ integer :: i, N
+ !$omp target teams map(to: v1, v2) map(from: p)
+ !$omp distribute parallel do simd
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ !$omp end target teams
+ end subroutine
+end module
+
+program e_54_6
+ use e_54_6_mod, only : init, check, vec_mult
+ real, pointer, dimension(:) :: p, v1, v2
+ integer :: n
+ n = 1000
+ allocate (p(n), v1(n), v2(n))
+ call init (v1, v2, n)
+ call vec_mult (p, v1, v2, n)
+ call check (p, N)
+ deallocate (p, v1, v2)
+end program
--- /dev/null
+! { dg-do run }
+
+module e_55_1_mod
+ integer, parameter :: N = 10000000, CHUNKSZ = 100000
+ real :: Y(N), Z(N)
+end module
+
+subroutine init ()
+ use e_55_1_mod, only : Y, Z, N
+ integer :: i
+ do i = 1, N
+ Y(i) = 0.1 * i
+ Z(i) = Y(i)
+ end do
+end subroutine
+
+subroutine check ()
+ use e_55_1_mod, only : Y, Z, N
+ real :: err
+ real, parameter :: EPS = 0.00001
+ integer :: i
+ do i = 1, N
+ if (Y(i) == 0.0) then
+ err = Z(i)
+ else if (Z(i) == 0.0) then
+ err = Y(i)
+ else
+ err = (Y(i) - Z(i)) / Z(i)
+ end if
+ if (err > EPS .or. err < -EPS) call abort
+ end do
+end subroutine
+
+real function F (z)
+ !$omp declare target
+ real, intent(in) :: z
+ F = -z
+end function
+
+subroutine pipedF ()
+ use e_55_1_mod, only: Z, N, CHUNKSZ
+ integer :: C, i
+ real :: F
+ do C = 1, N, CHUNKSZ
+ !$omp task
+ !$omp target map(Z(C:C+CHUNKSZ-1))
+ !$omp parallel do
+ do i = C, C+CHUNKSZ-1
+ Z(i) = F (Z(i))
+ end do
+ !$omp end target
+ !$omp end task
+ end do
+end subroutine
+
+subroutine pipedF_ref ()
+ use e_55_1_mod, only: Y, N
+ integer :: i
+ real :: F
+ do i = 1, N
+ Y(i) = F (Y(i))
+ end do
+end subroutine
+
+program e_55_1
+ call init ()
+ call pipedF ()
+ call pipedF_ref ()
+ call check ()
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+subroutine init (v1, v2, N)
+ !$omp declare target
+ integer :: i, N
+ real :: v1(N), v2(N)
+ do i = 1, N
+ v1(i) = i + 2.0
+ v2(i) = i - 3.0
+ end do
+end subroutine
+
+subroutine check (p, N)
+ integer :: i, N
+ real, parameter :: EPS = 0.00001
+ real :: diff, p(N)
+ do i = 1, N
+ diff = p(i) - (i + 2.0) * (i - 3.0)
+ if (diff > EPS .or. -diff > EPS) call abort
+ end do
+end subroutine
+
+subroutine vec_mult (p, N)
+ use omp_lib, only: omp_is_initial_device
+ real :: p(N)
+ real, allocatable :: v1(:), v2(:)
+ integer :: i
+ !$omp declare target (init)
+ !$omp target data map(to: v1, v2, N) map(from: p)
+ !$omp task shared(v1, v2, p) depend(out: v1, v2)
+ !$omp target map(to: v1, v2, N)
+ if (omp_is_initial_device ()) call abort
+ allocate (v1(N), v2(N))
+ call init (v1, v2, N)
+ !$omp end target
+ !$omp end task
+ !$omp task shared(v1, v2, p) depend(in: v1, v2)
+ !$omp target map(to: v1, v2, N) map(from: p)
+ if (omp_is_initial_device ()) call abort
+ !$omp parallel do
+ do i = 1, N
+ p(i) = v1(i) * v2(i)
+ end do
+ deallocate (v1, v2)
+ !$omp end target
+ !$omp end task
+ !$omp end target data
+ call check (p, N)
+end subroutine
+
+program e_55_2
+ integer, parameter :: N = 1000
+ real :: p(N)
+ call vec_mult (p, N)
+end program
--- /dev/null
+! { dg-do run }
+
+ call foo ()
+contains
+ subroutine foo ()
+ integer, target :: A(30)
+ integer, pointer :: p(:)
+ !$omp target data map(A(1:4))
+ p => A
+ !$omp target map(p(8:27)) map(A(1:4))
+ A(3) = 777
+ p(9) = 777
+ !$omp end target
+ !$omp end target data
+ if (A(3) /= 777 .or. A(9) /= 777) call abort
+ end subroutine
+end
--- /dev/null
+! { dg-do run }
+
+ call foo ()
+contains
+ subroutine foo ()
+ integer, target :: A(30)
+ integer, pointer :: p(:)
+ !$omp target data map(A(1:10))
+ p => A
+ !$omp target map(p(4:10)) map(A(1:10))
+ A(3) = 777
+ p(9) = 777
+ A(9) = 999
+ !$omp end target
+ !$omp end target data
+ if (A(3) /= 777 .or. A(9) /= 999) call abort
+ end subroutine
+end
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+program e_57_1
+ use omp_lib, only: omp_is_initial_device
+ integer :: a, b
+ logical :: c, d
+
+ a = 100
+ b = 0
+
+ !$omp target if(a > 200 .and. a < 400)
+ c = omp_is_initial_device ()
+ !$omp end target
+
+ !$omp target data map(to: b) if(a > 200 .and. a < 400)
+ !$omp target
+ b = 100
+ d = omp_is_initial_device ()
+ !$omp end target
+ !$omp end target data
+
+ if (b /= 100 .or. .not. c .or. d) call abort
+
+ a = a + 200
+ b = 0
+
+ !$omp target if(a > 200 .and. a < 400)
+ c = omp_is_initial_device ()
+ !$omp end target
+
+ !$omp target data map(to: b) if(a > 200 .and. a < 400)
+ !$omp target
+ b = 100
+ d = omp_is_initial_device ()
+ !$omp end target
+ !$omp end target data
+
+ if (b /= 0 .or. c .or. d) call abort
+
+ a = a + 200
+ b = 0
+
+ !$omp target if(a > 200 .and. a < 400)
+ c = omp_is_initial_device ()
+ !$omp end target
+
+ !$omp target data map(to: b) if(a > 200 .and. a < 400)
+ !$omp target
+ b = 100
+ d = omp_is_initial_device ()
+ !$omp end target
+ !$omp end target data
+
+ if (b /= 100 .or. .not. c .or. d) call abort
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+program e_57_2
+ use omp_lib, only: omp_is_initial_device, omp_get_num_devices
+ integer, parameter :: N = 10
+ integer :: i, num
+ logical :: offload(N)
+ num = omp_get_num_devices ()
+ !$omp parallel do
+ do i = 1, N
+ !$omp target device(i-1) map(from: offload(i:i))
+ offload(i) = omp_is_initial_device ()
+ !$omp end target
+ end do
+
+ do i = 1, num
+ if (offload(i)) call abort
+ end do
+
+ do i = num+1, N
+ if (.not. offload(i)) call abort
+ end do
+end program
--- /dev/null
+! { dg-do run }
+! { dg-require-effective-target offload_device }
+
+program e_57_3
+ use omp_lib, only: omp_is_initial_device, omp_get_num_devices,&
+ omp_get_default_device, omp_set_default_device
+ logical :: res
+ integer :: default_device
+
+ default_device = omp_get_default_device ()
+ !$omp target
+ res = omp_is_initial_device ()
+ !$omp end target
+ if (res) call abort
+
+ call omp_set_default_device (omp_get_num_devices ())
+ !$omp target
+ res = omp_is_initial_device ()
+ !$omp end target
+ if (.not. res) call abort
+end program