Using this patch, when using GOMP_DEBUG=1 and launching a kernel in
GOMP_OFFLOAD_run (used by the omp implementation), we see the kernel launch
dimensions:
...
GOMP_OFFLOAD_run: kernel main$_omp_fn$0: \
launch [(teams: 1), 1, 1] [(lanes: 32), (threads: 1), 1]
...
Build on x86_64-linux with nvptx accelerator, tested libgomp.
libgomp/ChangeLog:
2020-10-08 Tom de Vries <tdevries@suse.de>
PR libgomp/81802
* plugin/plugin-nvptx.c (GOMP_OFFLOAD_run): Report launch
dimensions.
void
GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
{
void
GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
{
- CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn;
+ struct targ_fn_descriptor *tgt_fn_desc
+ = (struct targ_fn_descriptor *) tgt_fn;
+ CUfunction function = tgt_fn_desc->fn;
+ const struct targ_fn_launch *launch = tgt_fn_desc->launch;
+ const char *fn_name = launch->fn;
CUresult r;
struct ptx_device *ptx_dev = ptx_devices[ord];
const char *maybe_abort_msg = "(perhaps abort was called)";
CUresult r;
struct ptx_device *ptx_dev = ptx_devices[ord];
const char *maybe_abort_msg = "(perhaps abort was called)";
CU_LAUNCH_PARAM_BUFFER_SIZE, &fn_args_size,
CU_LAUNCH_PARAM_END
};
CU_LAUNCH_PARAM_BUFFER_SIZE, &fn_args_size,
CU_LAUNCH_PARAM_END
};
+ GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
+ " [(teams: %u), 1, 1] [(lanes: 32), (threads: %u), 1]\n",
+ __FUNCTION__, fn_name, teams, threads);
r = CUDA_CALL_NOCHECK (cuLaunchKernel, function, teams, 1, 1,
32, threads, 1, 0, NULL, NULL, config);
if (r != CUDA_SUCCESS)
r = CUDA_CALL_NOCHECK (cuLaunchKernel, function, teams, 1, 1,
32, threads, 1, 0, NULL, NULL, config);
if (r != CUDA_SUCCESS)