+2018-07-30 Tom de Vries <tdevries@suse.de>
+
+ * plugin/plugin-nvptx.c (MIN, MAX): Redefine.
+ (nvptx_exec): Ensure worker and vector default dims don't exceed
+ targ_fn->max_threads_per_block.
+
2018-07-30 Tom de Vries <tdevries@suse.de>
* plugin/plugin-nvptx.c (struct ptx_device): Add default_dims field.
#include "secure_getenv.h"
+#undef MIN
+#undef MAX
+#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
+#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
+
/* Convenience macros for the frequently used CUDA library call and
error handling sequence as well as CUDA library calls that
do the error checking themselves or don't do it at all. */
void *kargs[1];
void *hp, *dp;
struct nvptx_thread *nvthd = nvptx_thread ();
+ int warp_size = nvthd->ptx_dev->warp_size;
const char *maybe_abort_msg = "(perhaps abort was called)";
function = targ_fn->fn;
int gang, worker, vector;
{
- int warp_size = nvthd->ptx_dev->warp_size;
int block_size = nvthd->ptx_dev->max_threads_per_block;
int cpu_size = nvthd->ptx_dev->max_threads_per_multiprocessor;
int dev_size = nvthd->ptx_dev->num_sms;
}
pthread_mutex_unlock (&ptx_dev_lock);
- for (i = 0; i != GOMP_DIM_MAX; i++)
- if (!dims[i])
- dims[i] = nvthd->ptx_dev->default_dims[i];
+ {
+ bool default_dim_p[GOMP_DIM_MAX];
+ for (i = 0; i != GOMP_DIM_MAX; i++)
+ {
+ default_dim_p[i] = !dims[i];
+ if (default_dim_p[i])
+ dims[i] = nvthd->ptx_dev->default_dims[i];
+ }
+
+ if (default_dim_p[GOMP_DIM_VECTOR])
+ dims[GOMP_DIM_VECTOR]
+ = MIN (dims[GOMP_DIM_VECTOR],
+ (targ_fn->max_threads_per_block / warp_size * warp_size));
+
+ if (default_dim_p[GOMP_DIM_WORKER])
+ dims[GOMP_DIM_WORKER]
+ = MIN (dims[GOMP_DIM_WORKER],
+ targ_fn->max_threads_per_block / dims[GOMP_DIM_VECTOR]);
+ }
}
/* Check if the accelerator has sufficient hardware resources to