This allows for making some things more explicit, later on.
gcc/
* omp-expand.c (expand_omp_target): Attach an attribute to all
outlined OpenACC compute regions.
* omp-offload.c (execute_oacc_device_lower): Adjust.
gcc/testsuite/
* c-c++-common/goacc/classify-parallel.c: Adjust.
* gfortran.dg/goacc/classify-parallel.f95: Likewise.
* c-c++-common/goacc/classify-serial.c: New.
* gfortran.dg/goacc/classify-serial.f95: Likewise.
entry_bb = region->entry;
exit_bb = region->exit;
+ if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
+ mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+
+ /* Going on, all OpenACC compute constructs are mapped to
+ 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
+ To distinguish between them, we attach attributes. */
switch (target_kind)
{
+ case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+ DECL_ATTRIBUTES (child_fn)
+ = tree_cons (get_identifier ("oacc parallel"),
+ NULL_TREE, DECL_ATTRIBUTES (child_fn));
+ break;
case GF_OMP_TARGET_KIND_OACC_KERNELS:
- mark_loops_in_oacc_kernels_region (region->entry, region->exit);
-
- /* Further down, all OpenACC compute constructs will be mapped to
- BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
- is an "oacc kernels" attribute set for OpenACC kernels. */
DECL_ATTRIBUTES (child_fn)
= tree_cons (get_identifier ("oacc kernels"),
NULL_TREE, DECL_ATTRIBUTES (child_fn));
break;
case GF_OMP_TARGET_KIND_OACC_SERIAL:
- /* Further down, all OpenACC compute constructs will be mapped to
- BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
- is an "oacc serial" attribute set for OpenACC serial. */
DECL_ATTRIBUTES (child_fn)
= tree_cons (get_identifier ("oacc serial"),
NULL_TREE, DECL_ATTRIBUTES (child_fn));
break;
default:
+ /* Make sure we don't miss any. */
+ gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
+ && is_gimple_omp_offloaded (entry_stmt)));
break;
}
flag_openacc_dims = (char *)&flag_openacc_dims;
}
+ bool is_oacc_parallel
+ = (lookup_attribute ("oacc parallel",
+ DECL_ATTRIBUTES (current_function_decl)) != NULL);
bool is_oacc_kernels
= (lookup_attribute ("oacc kernels",
DECL_ATTRIBUTES (current_function_decl)) != NULL);
+ bool is_oacc_serial
+ = (lookup_attribute ("oacc serial",
+ DECL_ATTRIBUTES (current_function_decl)) != NULL);
+ int fn_level = oacc_fn_attrib_level (attrs);
+ bool is_oacc_routine = (fn_level >= 0);
+ gcc_checking_assert (is_oacc_parallel
+ + is_oacc_kernels
+ + is_oacc_serial
+ + is_oacc_routine
+ == 1);
+
bool is_oacc_kernels_parallelized
= (lookup_attribute ("oacc kernels parallelized",
DECL_ATTRIBUTES (current_function_decl)) != NULL);
+ if (is_oacc_kernels_parallelized)
+ gcc_checking_assert (is_oacc_kernels);
+
+ if (dump_file)
+ {
+ if (is_oacc_parallel)
+ fprintf (dump_file, "Function is OpenACC parallel offload\n");
+ else if (is_oacc_kernels)
+ fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
+ (is_oacc_kernels_parallelized
+ ? "parallelized" : "unparallelized"));
+ else if (is_oacc_serial)
+ fprintf (dump_file, "Function is OpenACC serial offload\n");
+ else if (is_oacc_routine)
+ fprintf (dump_file, "Function is OpenACC routine level %d\n",
+ fn_level);
+ else
+ gcc_unreachable ();
+ }
/* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
kernels, so remove the parallelism dimensions function attributes
/* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery ();
- int fn_level = oacc_fn_attrib_level (attrs);
-
- if (dump_file)
- {
- if (fn_level >= 0)
- fprintf (dump_file, "Function is OpenACC routine level %d\n",
- fn_level);
- else if (is_oacc_kernels)
- fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
- (is_oacc_kernels_parallelized
- ? "parallelized" : "unparallelized"));
- else
- fprintf (dump_file, "Function is OpenACC parallel offload\n");
- }
- unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
+ unsigned outer_mask = 0;
+ if (is_oacc_routine)
+ outer_mask = GOMP_DIM_MASK (fn_level) - 1;
unsigned used_mask = oacc_loop_partition (loops, outer_mask);
/* OpenACC kernels constructs are special: they currently don't use the
generic oacc_loop infrastructure and attribute/dimension processing. */
}
/* Check the offloaded function's attributes.
- { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } } */
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } } */
/* Check the offloaded function's classification and compute dimensions (will
always be 1 x 1 x 1 for non-offloading compilation).
{ dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
{ dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
- { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
--- /dev/null
+/* Check offloaded function's attributes and classification for OpenACC
+ serial. */
+
+/* { dg-additional-options "-O2" }
+ { dg-additional-options "-fopt-info-optimized-omp" }
+ { dg-additional-options "-fdump-tree-ompexp" }
+ { dg-additional-options "-fdump-tree-oaccdevlow" } */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void SERIAL ()
+{
+#pragma acc serial loop copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang vector loop parallelism" } */
+ for (unsigned int i = 0; i < N; i++)
+ c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+ always be 1 x 1 x 1 for non-offloading compilation).
+ { dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccdevlow" } } */
end program main
! Check the offloaded function's attributes.
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target entrypoint\\)\\)" 1 "ompexp" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc parallel, omp target entrypoint\\)\\)" 1 "ompexp" } }
! Check the offloaded function's classification and compute dimensions (will
! always be 1 x 1 x 1 for non-offloading compilation).
! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC parallel offload" 1 "oaccdevlow" } }
! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
-! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc parallel, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }
--- /dev/null
+! Check offloaded function's attributes and classification for OpenACC
+! serial.
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fopt-info-optimized-omp" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-oaccdevlow" }
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i
+
+ call setup(a, b)
+
+ !$acc serial loop copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang vector loop parallelism" }
+ do i = 0, n - 1
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end serial loop
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc serial, omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is OpenACC serial offload" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccdevlow" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc serial, omp target entrypoint\\)\\)" 1 "oaccdevlow" } }