* libgomp.oacc-c-c++-common/reduction-dbl.c: New.
* libgomp.oacc-c-c++-common/reduction-flt.c: New.
* libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Use typedef.
* libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Use typedef.
* libgomp.oacc-c-c++-common/reduction-2.c: Uncomment broken tests
and fix.
* libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
* libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
From-SVN: r230621
+2015-11-19 Nathan Sidwell <nathan@codesourcery.com>
+
+ * libgomp.oacc-c-c++-common/reduction-dbl.c: New.
+ * libgomp.oacc-c-c++-common/reduction-flt.c: New.
+ * libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Use typedef.
+ * libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Use typedef.
+ * libgomp.oacc-c-c++-common/reduction-2.c: Uncomment broken tests
+ and fix.
+ * libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
+ * libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
+
2015-11-18 Nathan Sidwell <nathan@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Add
if (fabs(result - vresult) > .0001)
abort ();
-// result = 0;
-// vresult = 0;
-//
-// /* 'max' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result > array[i] ? result : array[i];
-//
-// /* Verify the reduction. */
-// for (i = 0; i < n; i++)
-// vresult = vresult > array[i] ? vresult : array[i];
-//
-// printf("%d != %d\n", result, vresult);
-// if (result != vresult)
-// abort ();
-//
-// result = 0;
-// vresult = 0;
-//
-// /* 'min' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result < array[i] ? result : array[i];
-//
-// /* Verify the reduction. */
-// for (i = 0; i < n; i++)
-// vresult = vresult < array[i] ? vresult : array[i];
-//
-// printf("%d != %d\n", result, vresult);
-// if (result != vresult)
-// abort ();
+ result = 0;
+ vresult = 0;
+
+ /* 'max' reductions. */
+#pragma acc parallel vector_length (vl) copy(result)
+#pragma acc loop reduction (max:result)
+ for (i = 0; i < n; i++)
+ result = result > array[i] ? result : array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult = vresult > array[i] ? vresult : array[i];
+
+ if (result != vresult)
+ abort ();
+
+ result = 0;
+ vresult = 0;
+
+ /* 'min' reductions. */
+#pragma acc parallel vector_length (vl) copy(result)
+#pragma acc loop reduction (min:result)
+ for (i = 0; i < n; i++)
+ result = result < array[i] ? result : array[i];
+
+ /* Verify the reduction. */
+ for (i = 0; i < n; i++)
+ vresult = vresult < array[i] ? vresult : array[i];
+
+ if (result != vresult)
+ abort ();
result = 5;
vresult = 5;
result = 0;
vresult = 0;
- /* '+' reductions. */
+ /* 'max' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (+:result)
+#pragma acc loop reduction (max:result)
for (i = 0; i < n; i++)
- result += array[i];
+ result = result > array[i] ? result : array[i];
/* Verify the reduction. */
for (i = 0; i < n; i++)
- vresult += array[i];
+ vresult = vresult > array[i] ? vresult : array[i];
if (result != vresult)
abort ();
result = 0;
vresult = 0;
- /* '*' reductions. */
+ /* 'min' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (*:result)
+#pragma acc loop reduction (min:result)
for (i = 0; i < n; i++)
- result *= array[i];
+ result = result < array[i] ? result : array[i];
/* Verify the reduction. */
for (i = 0; i < n; i++)
- vresult *= array[i];
+ vresult = vresult < array[i] ? vresult : array[i];
- if (fabs(result - vresult) > .0001)
+ if (result != vresult)
abort ();
-// result = 0;
-// vresult = 0;
-//
-// /* 'max' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result > array[i] ? result : array[i];
-//
-// /* Verify the reduction. */
-// for (i = 0; i < n; i++)
-// vresult = vresult > array[i] ? vresult : array[i];
-//
-// printf("%d != %d\n", result, vresult);
-// if (result != vresult)
-// abort ();
-//
-// result = 0;
-// vresult = 0;
-//
-// /* 'min' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result < array[i] ? result : array[i];
-//
-// /* Verify the reduction. */
-// for (i = 0; i < n; i++)
-// vresult = vresult < array[i] ? vresult : array[i];
-//
-// printf("%d != %d\n", result, vresult);
-// if (result != vresult)
-// abort ();
result = 5;
vresult = 5;
result = 0;
vresult = 0;
- /* '+' reductions. */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (+:result)
- for (i = 0; i < n; i++)
- result += array[i];
-
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- vresult += array[i];
-
- if (result != vresult)
- abort ();
-
- result = 0;
- vresult = 0;
-
- /* Needs support for complex multiplication. */
-
-// /* '*' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (*:result)
-// for (i = 0; i < n; i++)
-// result *= array[i];
-//
-// /* Verify the reduction. */
-// for (i = 0; i < n; i++)
-// vresult *= array[i];
-//
-// if (fabs(result - vresult) > .0001)
-// abort ();
-// result = 0;
-// vresult = 0;
-
-// /* 'max' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result > array[i] ? result : array[i];
-//
-// /* Verify the reduction. */
-// for (i = 0; i < n; i++)
-// vresult = vresult > array[i] ? vresult : array[i];
-//
-// printf("%d != %d\n", result, vresult);
-// if (result != vresult)
-// abort ();
-//
-// result = 0;
-// vresult = 0;
-//
-// /* 'min' reductions. */
-// #pragma acc parallel vector_length (vl)
-// #pragma acc loop reduction (+:result)
-// for (i = 0; i < n; i++)
-// result = result < array[i] ? result : array[i];
-//
-// /* Verify the reduction. */
-// for (i = 0; i < n; i++)
-// vresult = vresult < array[i] ? vresult : array[i];
-//
-// printf("%d != %d\n", result, vresult);
-// if (result != vresult)
-// abort ();
-
- result = 5;
- vresult = 5;
-
- lresult = false;
- lvresult = false;
-
/* '&&' reductions. */
#pragma acc parallel vector_length (vl) copy(lresult)
#pragma acc loop reduction (&&:lresult)
/* Double float has 53 bits of fraction. */
#define FRAC (1.0 / (1LL << 48))
+typedef double _Complex Type;
-int close_enough (double _Complex a, double _Complex b)
+int close_enough (Type a, Type b)
{
- double _Complex diff = a - b;
+ Type diff = a - b;
double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
double mag2_diff = (__real__(diff) * __real__ (diff)
+ __imag__ (diff) * __imag__ (diff));
#define N 100
static int __attribute__ ((noinline))
-vector (double _Complex ary[N], double _Complex sum, double _Complex prod)
+vector (Type ary[N], Type sum, Type prod)
{
- double _Complex tsum = 0, tprod = 1;
+ Type tsum = 0, tprod = 1;
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
{
}
static int __attribute__ ((noinline))
-worker (double _Complex ary[N], double _Complex sum, double _Complex prod)
+worker (Type ary[N], Type sum, Type prod)
{
- double _Complex tsum = 0, tprod = 1;
+ Type tsum = 0, tprod = 1;
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
{
}
static int __attribute__ ((noinline))
-gang (double _Complex ary[N], double _Complex sum, double _Complex prod)
+gang (Type ary[N], Type sum, Type prod)
{
- double _Complex tsum = 0, tprod = 1;
+ Type tsum = 0, tprod = 1;
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
{
int main (void)
{
- double _Complex ary[N], sum = 0, prod = 1;
+ Type ary[N], sum = 0, prod = 1;
for (int ix = 0; ix < N; ix++)
{
/* Single float has 23 bits of fraction. */
#define FRAC (1.0f / (1 << 20))
+typedef float _Complex Type;
-int close_enough (float _Complex a, float _Complex b)
+int close_enough (Type a, Type b)
{
- float _Complex diff = a - b;
+ Type diff = a - b;
float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
float mag2_diff = (__real__(diff) * __real__ (diff)
+ __imag__ (diff) * __imag__ (diff));
#define N 100
static int __attribute__ ((noinline))
-vector (float _Complex ary[N], float _Complex sum, float _Complex prod)
+vector (Type ary[N], Type sum, Type prod)
{
- float _Complex tsum = 0, tprod = 1;
+ Type tsum = 0, tprod = 1;
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
{
}
static int __attribute__ ((noinline))
-worker (float _Complex ary[N], float _Complex sum, float _Complex prod)
+worker (Type ary[N], Type sum, Type prod)
{
- float _Complex tsum = 0, tprod = 1;
+ Type tsum = 0, tprod = 1;
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
{
}
static int __attribute__ ((noinline))
-gang (float _Complex ary[N], float _Complex sum, float _Complex prod)
+gang (Type ary[N], Type sum, Type prod)
{
- float _Complex tsum = 0, tprod = 1;
+ Type tsum = 0, tprod = 1;
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
{
int main (void)
{
- float _Complex ary[N], sum = 0, prod = 1;
+ Type ary[N], sum = 0, prod = 1;
for (int ix = 0; ix < N; ix++)
{
--- /dev/null
+
+/* Double float has 53 bits of fraction. */
+#define FRAC (1.0 / (1LL << 48))
+typedef double Type;
+
+int close_enough (Type a, Type b)
+{
+ Type diff = a - b;
+ if (diff < 0)
+ diff = -diff;
+
+ return diff / a < FRAC;
+}
+
+#define N 100
+
+static int __attribute__ ((noinline))
+vector (Type ary[N], Type sum, Type prod)
+{
+ Type tsum = 0, tprod = 1;
+
+#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+ {
+#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
+ for (int ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}
+
+static int __attribute__ ((noinline))
+worker (Type ary[N], Type sum, Type prod)
+{
+ Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+ {
+#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
+ for (int ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}
+
+static int __attribute__ ((noinline))
+gang (Type ary[N], Type sum, Type prod)
+{
+ Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+ {
+#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
+ for (int ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}
+
+int main (void)
+{
+ Type ary[N], sum = 0, prod = 1;
+
+ for (int ix = 0; ix < N; ix++)
+ {
+ float frac = ix * (1.0f / 1024) + 1.0f;
+
+ ary[ix] = frac;
+ sum += ary[ix];
+ prod *= ary[ix];
+ }
+
+ if (vector (ary, sum, prod))
+ return 1;
+
+ if (worker (ary, sum, prod))
+ return 1;
+
+ if (gang (ary, sum, prod))
+ return 1;
+
+ return 0;
+}
--- /dev/null
+
+/* Single float has 23 bits of fraction. */
+#define FRAC (1.0f / (1 << 20))
+typedef float Type;
+
+int close_enough (Type a, Type b)
+{
+ Type diff = a - b;
+ if (diff < 0)
+ diff = -diff;
+
+ return diff / a < FRAC;
+}
+
+#define N 100
+
+static int __attribute__ ((noinline))
+vector (Type ary[N], Type sum, Type prod)
+{
+ Type tsum = 0, tprod = 1;
+
+#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
+ {
+#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
+ for (int ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}
+
+static int __attribute__ ((noinline))
+worker (Type ary[N], Type sum, Type prod)
+{
+ Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
+ {
+#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
+ for (int ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}
+
+static int __attribute__ ((noinline))
+gang (Type ary[N], Type sum, Type prod)
+{
+ Type tsum = 0, tprod = 1;
+
+#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
+ {
+#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
+ for (int ix = 0; ix < N; ix++)
+ {
+ tsum += ary[ix];
+ tprod *= ary[ix];
+ }
+ }
+
+ if (!close_enough (sum, tsum))
+ return 1;
+
+ if (!close_enough (prod, tprod))
+ return 1;
+
+ return 0;
+}
+
+int main (void)
+{
+ Type ary[N], sum = 0, prod = 1;
+
+ for (int ix = 0; ix < N; ix++)
+ {
+ float frac = ix * (1.0f / 1024) + 1.0f;
+
+ ary[ix] = frac;
+ sum += ary[ix];
+ prod *= ary[ix];
+ }
+
+ if (vector (ary, sum, prod))
+ return 1;
+
+ if (worker (ary, sum, prod))
+ return 1;
+
+ if (gang (ary, sum, prod))
+ return 1;
+
+ return 0;
+}