From b6adbb9faabb776ae7b70a5f5943ae883b1f76ea Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 1 Feb 2016 16:20:13 +0000 Subject: [PATCH] nvptx.c (PTX_GANG_DEFAULT): New. gcc/ * config/nvptx/nvptx.c (PTX_GANG_DEFAULT): New. (nvptx_goacc_validate_dims): Extend to handle global defaults. * target.def (OACC_VALIDATE_DIMS): Extend documentation. * doc/tm.texti: Rebuilt. * doc/invoke.texi (fopenacc-dim): Document. * lto-wrapper.c (merge_and_complain): Add OPT_fopenacc_dim_ case. (append_compiler_options): Likewise. * omp-low.c (oacc_default_dims, oacc_min_dims): New. (oacc_parse_default_dims): New. (oacc_validate_dims): Add USED arg. Select non-unity default when possible. (oacc_loop_fixed_partitions): Return mask of used partitions. (oacc_loop_auto_partitions): Emit dump info. (oacc_loop_partition): Return mask of used partitions. (execute_oacc_device_lower): Parse default dimension arg. Adjust loop partitioning and validation calls. gcc/c-family/ * c.opt (fopenacc-dim=): New option. gcc/fortran/ * lang.opt (fopenacc-dim=): New option. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: New. * testsuite/libgomp.oacc-fortran/routine-7.f90: Serialize loop. From-SVN: r233041 --- gcc/ChangeLog | 19 +++ gcc/c-family/ChangeLog | 4 + gcc/c-family/c.opt | 4 + gcc/config/nvptx/nvptx.c | 23 ++- gcc/doc/invoke.texi | 10 +- gcc/doc/tm.texi | 9 +- gcc/fortran/ChangeLog | 4 + gcc/fortran/lang.opt | 4 + gcc/lto-wrapper.c | 16 +- gcc/omp-low.c | 152 +++++++++++++++--- gcc/target.def | 9 +- libgomp/ChangeLog | 5 + .../loop-dim-default.c | 133 +++++++++++++++ .../libgomp.oacc-fortran/routine-7.f90 | 4 +- 14 files changed, 353 insertions(+), 43 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e17d3c54fb8..cb6d8bdfc64 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2016-02-01 Nathan Sidwell + + * config/nvptx/nvptx.c (PTX_GANG_DEFAULT): New. + (nvptx_goacc_validate_dims): Extend to handle global defaults. + * target.def (OACC_VALIDATE_DIMS): Extend documentation. + * doc/tm.texti: Rebuilt. + * doc/invoke.texi (fopenacc-dim): Document. + * lto-wrapper.c (merge_and_complain): Add OPT_fopenacc_dim_ case. + (append_compiler_options): Likewise. + * omp-low.c (oacc_default_dims, oacc_min_dims): New. + (oacc_parse_default_dims): New. + (oacc_validate_dims): Add USED arg. Select non-unity default when + possible. + (oacc_loop_fixed_partitions): Return mask of used partitions. + (oacc_loop_auto_partitions): Emit dump info. + (oacc_loop_partition): Return mask of used partitions. + (execute_oacc_device_lower): Parse default dimension arg. Adjust + loop partitioning and validation calls. + 2016-02-01 Richard Biener PR middle-end/69556 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 8c869896130..5bd7d3ba691 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,7 @@ +2016-02-01 Nathan Sidwell + + * c.opt (fopenacc-dim=): New option. + 2016-01-27 Ryan Burn PR cilkplus/69267 diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 2b6b52d761b..f243744a978 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1372,6 +1372,10 @@ fopenacc C ObjC C++ ObjC++ LTO Var(flag_openacc) Enable OpenACC. +fopenacc-dim= +C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims) +Specify default OpenACC compute dimensions. + fopenmp C ObjC C++ ObjC++ LTO Var(flag_openmp) Enable OpenMP (implies -frecursive in Fortran). diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 2d4dad1e172..1dadfc57fec 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -4122,10 +4122,12 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), /* Define dimension sizes for known hardware. */ #define PTX_VECTOR_LENGTH 32 #define PTX_WORKER_LENGTH 32 +#define PTX_GANG_DEFAULT 32 /* Validate compute dimensions of an OpenACC offload or routine, fill in non-unity defaults. FN_LEVEL indicates the level at which a - routine might spawn a loop. It is negative for non-routines. */ + routine might spawn a loop. It is negative for non-routines. If + DECL is null, we are validating the default dimensions. */ static bool nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) @@ -4133,11 +4135,12 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) bool changed = false; /* The vector size must be 32, unless this is a SEQ routine. */ - if (fn_level <= GOMP_DIM_VECTOR + if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1 + && dims[GOMP_DIM_VECTOR] >= 0 && dims[GOMP_DIM_VECTOR] != PTX_VECTOR_LENGTH) { - if (dims[GOMP_DIM_VECTOR] >= 0 && fn_level < 0) - warning_at (DECL_SOURCE_LOCATION (decl), 0, + if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0) + warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0, dims[GOMP_DIM_VECTOR] ? "using vector_length (%d), ignoring %d" : "using vector_length (%d), ignoring runtime setting", @@ -4149,13 +4152,23 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) /* Check the num workers is not too large. */ if (dims[GOMP_DIM_WORKER] > PTX_WORKER_LENGTH) { - warning_at (DECL_SOURCE_LOCATION (decl), 0, + warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0, "using num_workers (%d), ignoring %d", PTX_WORKER_LENGTH, dims[GOMP_DIM_WORKER]); dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH; changed = true; } + if (!decl) + { + dims[GOMP_DIM_VECTOR] = PTX_VECTOR_LENGTH; + if (dims[GOMP_DIM_WORKER] < 0) + dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH; + if (dims[GOMP_DIM_GANG] < 0) + dims[GOMP_DIM_GANG] = PTX_GANG_DEFAULT; + changed = true; + } + return changed; } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ba0b4b2cd56..fcc404e8b2b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1963,9 +1963,13 @@ Programming Interface v2.0 @w{@uref{http://www.openacc.org/}}. This option implies @option{-pthread}, and thus is only supported on targets that have support for @option{-pthread}. -Note that this is an experimental feature, incomplete, and subject to -change in future versions of GCC. See -@w{@uref{https://gcc.gnu.org/wiki/OpenACC}} for more information. +@item -fopenacc-dim=@var{geom} +@opindex fopenacc-dim +@cindex OpenACC accelerator programming +Specify default compute dimensions for parallel offload regions that do +not explicitly specify. The @var{geom} value is a triple of +':'-separated sizes, in order 'gang', 'worker' and, 'vector'. A size +can be omitted, to use a target-specific default value. @item -fopenmp @opindex fopenmp diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index aae09bfc82f..2392691e29c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5767,11 +5767,12 @@ to use it. @deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int *@var{dims}, int @var{fn_level}) This hook should check the launch dimensions provided for an OpenACC compute region, or routine. Defaulted values are represented as -1 -and non-constant values as 0. The @var{fn_level} is negative for the +and non-constant values as 0. The @var{fn_level} is negative for the function corresponding to the compute region. For a routine is is the -outermost level at which partitioned execution may be spawned. It -should fill in anything that needs to default to non-unity and verify -non-defaults. Diagnostics should be issued as appropriate. Return +outermost level at which partitioned execution may be spawned. The hook +should verify non-default values. If DECL is NULL, global defaults +are being validated and unspecified defaults should be filled in. +Diagnostics should be issued as appropriate. Return true, if changes have been made. You must override this hook to provide dimensions larger than 1. @end deftypefn diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index ccc29c1d39d..36b4ddb7d5a 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,7 @@ +2016-02-02 Nathan Sidwell + + * lang.opt (fopenacc-dim=): New option. + 2016-01-31 Paul Thomas PR fortran/67564 diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index f368ab847c5..45428d8cf41 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -578,6 +578,10 @@ fopenacc Fortran LTO ; Documented in C +fopenacc-dim= +Fortran LTO Joined Var(flag_openacc_dims) +; Documented in C + fopenmp Fortran LTO ; Documented in C diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c index e636b8b54be..ced6f2f9cff 100644 --- a/gcc/lto-wrapper.c +++ b/gcc/lto-wrapper.c @@ -287,12 +287,25 @@ merge_and_complain (struct cl_decoded_option **decoded_options, append_option (decoded_options, decoded_options_count, foption); /* -fmath-errno > -fno-math-errno, -fsigned-zeros > -fno-signed-zeros, - -ftrapping-math -> -fno-trapping-math, + -ftrapping-math > -fno-trapping-math, -fwrapv > -fno-wrapv. */ else if (foption->value > (*decoded_options)[j].value) (*decoded_options)[j] = *foption; break; + case OPT_fopenacc_dim_: + /* Append or check identical. */ + for (j = 0; j < *decoded_options_count; ++j) + if ((*decoded_options)[j].opt_index == foption->opt_index) + break; + if (j == *decoded_options_count) + append_option (decoded_options, decoded_options_count, foption); + else if (strcmp ((*decoded_options)[j].arg, foption->arg)) + fatal_error (input_location, + "Option %s with different values", + foption->orig_option_with_args_text); + break; + case OPT_freg_struct_return: case OPT_fpcc_struct_return: case OPT_fshort_double: @@ -506,6 +519,7 @@ append_compiler_options (obstack *argv_obstack, struct cl_decoded_option *opts, case OPT_fwrapv: case OPT_fopenmp: case OPT_fopenacc: + case OPT_fopenacc_dim_: case OPT_fcilkplus: case OPT_ftrapv: case OPT_fstrict_overflow: diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 677ad64e7e3..ec4b4b55458 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -20238,13 +20238,80 @@ oacc_xform_loop (gcall *call) gsi_replace_with_seq (&gsi, seq, true); } +/* Default partitioned and minimum partitioned dimensions. */ + +static int oacc_default_dims[GOMP_DIM_MAX]; +static int oacc_min_dims[GOMP_DIM_MAX]; + +/* Parse the default dimension parameter. This is a set of + :-separated optional compute dimensions. Each specified dimension + is a positive integer. When device type support is added, it is + planned to be a comma separated list of such compute dimensions, + with all but the first prefixed by the colon-terminated device + type. */ + +static void +oacc_parse_default_dims (const char *dims) +{ + int ix; + + for (ix = GOMP_DIM_MAX; ix--;) + { + oacc_default_dims[ix] = -1; + oacc_min_dims[ix] = 1; + } + +#ifndef ACCEL_COMPILER + /* Cannot be overridden on the host. */ + dims = NULL; +#endif + if (dims) + { + const char *pos = dims; + + for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++) + { + if (ix) + { + if (*pos != ':') + goto malformed; + pos++; + } + + if (*pos != ':') + { + long val; + const char *eptr; + + errno = 0; + val = strtol (pos, CONST_CAST (char **, &eptr), 10); + if (errno || val <= 0 || (unsigned)val != val) + goto malformed; + pos = eptr; + oacc_default_dims[ix] = (int)val; + } + } + if (*pos) + { + malformed: + error_at (UNKNOWN_LOCATION, + "-fopenacc-dim operand is malformed at '%s'", pos); + } + } + + /* Allow the backend to validate the dimensions. */ + targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1); + targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2); +} + /* Validate and update the dimensions for offloaded FN. ATTRS is the raw attribute. DIMS is an array of dimensions, which is filled in. LEVEL is the partitioning level of a routine, or -1 for an offload - region itself. */ + region itself. USED is the mask of partitioned execution in the + function. */ static void -oacc_validate_dims (tree fn, tree attrs, int *dims, int level) +oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used) { tree purpose[GOMP_DIM_MAX]; unsigned ix; @@ -20265,11 +20332,29 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level) bool changed = targetm.goacc.validate_dims (fn, dims, level); - /* Default anything left to 1. */ + /* Default anything left to 1 or a partitioned default. */ for (ix = 0; ix != GOMP_DIM_MAX; ix++) if (dims[ix] < 0) { - dims[ix] = 1; + /* The OpenACC spec says 'If the [num_gangs] clause is not + specified, an implementation-defined default will be used; + the default may depend on the code within the construct.' + (2.5.6). Thus an implementation is free to choose + non-unity default for a parallel region that doesn't have + any gang-partitioned loops. However, it appears that there + is a sufficient body of user code that expects non-gang + partitioned regions to not execute in gang-redundant mode. + So we (a) don't warn about the non-portability and (b) pick + the minimum permissible dimension size when there is no + partitioned execution. Otherwise we pick the global + default for the dimension, which the user can control. The + same wording and logic applies to num_workers and + vector_length, however the worker- or vector- single + execution doesn't have the same impact as gang-redundant + execution. (If the minimum gang-level partioning is not 1, + the target is probably too confusing.) */ + dims[ix] = (used & GOMP_DIM_MASK (ix) + ? oacc_default_dims[ix] : oacc_min_dims[ix]); changed = true; } @@ -20719,14 +20804,15 @@ oacc_loop_process (oacc_loop *loop) /* Walk the OpenACC loop heirarchy checking and assigning the programmer-specified partitionings. OUTER_MASK is the partitioning - this loop is contained within. Return true if we contain an - auto-partitionable loop. */ + this loop is contained within. Return mask of partitioning + encountered. If any auto loops are discovered, set GOMP_DIM_MAX + bit. */ -static bool +static unsigned oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) { unsigned this_mask = loop->mask; - bool has_auto = false; + unsigned mask_all = 0; bool noisy = true; #ifdef ACCEL_COMPILER @@ -20760,7 +20846,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) } } if (auto_par && (loop->flags & OLF_INDEPENDENT)) - has_auto = true; + mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); } if (this_mask & outer_mask) @@ -20814,16 +20900,16 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) } loop->mask = this_mask; + mask_all |= this_mask; + + if (loop->child) + mask_all |= oacc_loop_fixed_partitions (loop->child, + outer_mask | this_mask); - if (loop->child - && oacc_loop_fixed_partitions (loop->child, outer_mask | this_mask)) - has_auto = true; - - if (loop->sibling - && oacc_loop_fixed_partitions (loop->sibling, outer_mask)) - has_auto = true; + if (loop->sibling) + mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask); - return has_auto; + return mask_all; } /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops. @@ -20865,6 +20951,11 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) warning_at (loop->loc, 0, "insufficient partitioning available to parallelize loop"); + if (dump_file) + fprintf (dump_file, "Auto loop %s:%d assigned %d\n", + LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc), + this_mask); + loop->mask = this_mask; } inner_mask |= loop->mask; @@ -20876,13 +20967,19 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) } /* Walk the OpenACC loop heirarchy to check and assign partitioning - axes. */ + axes. Return mask of partitioning. */ -static void +static unsigned oacc_loop_partition (oacc_loop *loop, unsigned outer_mask) { - if (oacc_loop_fixed_partitions (loop, outer_mask)) - oacc_loop_auto_partitions (loop, outer_mask); + unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask); + + if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX)) + { + mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX); + mask_all |= oacc_loop_auto_partitions (loop, outer_mask); + } + return mask_all; } /* Default fork/join early expander. Delete the function calls if @@ -20958,6 +21055,13 @@ execute_oacc_device_lower () /* Not an offloaded function. */ return 0; + /* Parse the default dim argument exactly once. */ + if ((const void *)flag_openacc_dims != &flag_openacc_dims) + { + oacc_parse_default_dims (flag_openacc_dims); + flag_openacc_dims = (char *)&flag_openacc_dims; + } + /* Discover, partition and process the loops. */ oacc_loop *loops = oacc_loop_discovery (); int fn_level = oacc_fn_attrib_level (attrs); @@ -20969,10 +21073,10 @@ execute_oacc_device_lower () : "Function is routine level %d\n", fn_level); unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0; - oacc_loop_partition (loops, outer_mask); - + unsigned used_mask = oacc_loop_partition (loops, outer_mask); int dims[GOMP_DIM_MAX]; - oacc_validate_dims (current_function_decl, attrs, dims, fn_level); + + oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask); if (dump_file) { diff --git a/gcc/target.def b/gcc/target.def index d60319e455e..fa0af67475f 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1648,11 +1648,12 @@ DEFHOOK (validate_dims, "This hook should check the launch dimensions provided for an OpenACC\n\ compute region, or routine. Defaulted values are represented as -1\n\ -and non-constant values as 0. The @var{fn_level} is negative for the\n\ +and non-constant values as 0. The @var{fn_level} is negative for the\n\ function corresponding to the compute region. For a routine is is the\n\ -outermost level at which partitioned execution may be spawned. It\n\ -should fill in anything that needs to default to non-unity and verify\n\ -non-defaults. Diagnostics should be issued as appropriate. Return\n\ +outermost level at which partitioned execution may be spawned. The hook\n\ +should verify non-default values. If DECL is NULL, global defaults\n\ +are being validated and unspecified defaults should be filled in.\n\ +Diagnostics should be issued as appropriate. Return\n\ true, if changes have been made. You must override this hook to\n\ provide dimensions larger than 1.", bool, (tree decl, int *dims, int fn_level), diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 9c09ac50cba..c94fa277475 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,8 @@ +2016-02-01 Nathan Sidwell + + * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: New. + * testsuite/libgomp.oacc-fortran/routine-7.f90: Serialize loop. + 2016-01-26 Tom de Vries PR tree-optimization/69110 diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c new file mode 100644 index 00000000000..36b882ff330 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c @@ -0,0 +1,133 @@ + +/* { dg-additional-options "-O2 -fopenacc-dim=16:16" } */ + +#include +#include +#include +#include + +#pragma acc routine +static int __attribute__ ((noinline)) coord () +{ + int res = 0; + + if (acc_on_device (acc_device_nvidia)) + { + int g = 0, w = 0, v = 0; + + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); + __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); + __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + res = (1 << 24) | (g << 16) | (w << 8) | v; + } + return res; +} + + +int check (const int *ary, int size, int gp, int wp, int vp) +{ + int exit = 0; + int ix; + int *gangs = (int *)alloca (gp * sizeof (int)); + int *workers = (int *)alloca (wp * sizeof (int)); + int *vectors = (int *)alloca (vp * sizeof (int)); + int offloaded = 0; + + memset (gangs, 0, gp * sizeof (int)); + memset (workers, 0, wp * sizeof (int)); + memset (vectors, 0, vp * sizeof (int)); + + for (ix = 0; ix < size; ix++) + { + int g = (ary[ix] >> 16) & 0xff; + int w = (ary[ix] >> 8) & 0xff; + int v = (ary[ix] >> 0) & 0xff; + + if (g >= gp || w >= wp || v >= vp) + { + printf ("unexpected cpu %#x used\n", ary[ix]); + exit = 1; + } + else + { + vectors[v]++; + workers[w]++; + gangs[g]++; + } + offloaded += ary[ix] >> 24; + } + + if (!offloaded) + return 0; + + if (offloaded != size) + { + printf ("offloaded %d times, expected %d\n", offloaded, size); + return 1; + } + + for (ix = 0; ix < gp; ix++) + if (gangs[ix] != gangs[0]) + { + printf ("gang %d not used %d times\n", ix, gangs[0]); + exit = 1; + } + + for (ix = 0; ix < wp; ix++) + if (workers[ix] != workers[0]) + { + printf ("worker %d not used %d times\n", ix, workers[0]); + exit = 1; + } + + for (ix = 0; ix < vp; ix++) + if (vectors[ix] != vectors[0]) + { + printf ("vector %d not used %d times\n", ix, vectors[0]); + exit = 1; + } + + return exit; +} + +#define N (32 *32*32) + +int test_1 (int gp, int wp, int vp) +{ + int ary[N]; + int exit = 0; + +#pragma acc parallel copyout (ary) + { +#pragma acc loop gang (static:1) + for (int ix = 0; ix < N; ix++) + ary[ix] = coord (); + } + + exit |= check (ary, N, gp, 1, 1); + +#pragma acc parallel copyout (ary) + { +#pragma acc loop worker + for (int ix = 0; ix < N; ix++) + ary[ix] = coord (); + } + + exit |= check (ary, N, 1, wp, 1); + +#pragma acc parallel copyout (ary) + { +#pragma acc loop vector + for (int ix = 0; ix < N; ix++) + ary[ix] = coord (); + } + + exit |= check (ary, N, 1, 1, vp); + + return exit; +} + +int main () +{ + return test_1 (16, 16, 32); +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 index 7fc81691bfb..200188ec051 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 @@ -41,7 +41,7 @@ program main end do !$acc parallel copy (b) - !$acc loop + !$acc loop seq do i = 1, N call worker (b) end do @@ -56,7 +56,7 @@ program main end do !$acc parallel copy (a) - !$acc loop + !$acc loop seq do i = 1, N call vector (a) end do -- 2.30.2