return &errmsg[0];
}
-struct targ_fn_descriptor
-{
- CUfunction fn;
- const char *name;
-};
-
static unsigned int instantiated_devices = 0;
static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
return;
}
+/* Descriptor of a loaded function. */
+
+struct targ_fn_descriptor
+{
+ CUfunction fn;
+ const char *name;
+};
+
+/* A loaded PTX image. */
+struct ptx_image_data
+{
+ const void *target_data;
+ CUmodule module;
+
+ struct targ_fn_descriptor *fns; /* Array of functions. */
+
+ struct ptx_image_data *next;
+};
+
struct ptx_device
{
CUcontext ctx;
int mode;
bool mkern;
+ struct ptx_image_data *images; /* Images loaded on device. */
+ pthread_mutex_t image_lock; /* Lock for above list. */
+
struct ptx_device *next;
};
struct ptx_event *next;
};
-struct ptx_image_data
-{
- const void *target_data;
- CUmodule module;
- struct ptx_image_data *next;
-};
-
static pthread_mutex_t ptx_event_lock;
static struct ptx_event *ptx_events;
static struct ptx_device **ptx_devices;
-static struct ptx_image_data *ptx_images = NULL;
-static pthread_mutex_t ptx_image_lock = PTHREAD_MUTEX_INITIALIZER;
-
#define _XSTR(s) _STR(s)
#define _STR(s) #s
/* Initialize the device. Return TRUE on success, else FALSE. PTX_DEV_LOCK
should be locked on entry and remains locked on exit. */
+
static bool
nvptx_init (void)
{
if (r != CUDA_SUCCESS)
async_engines = 1;
+ ptx_dev->images = NULL;
+ pthread_mutex_init (&ptx_dev->image_lock, NULL);
+
init_streams_for_device (ptx_dev, async_engines);
return ptx_dev;
return;
fini_streams_for_device (ptx_dev);
+
+ pthread_mutex_destroy (&ptx_dev->image_lock);
if (!ptx_dev->ctx_shared)
{
size_t fn_num;
} nvptx_tdata_t;
+/* Load the (partial) program described by TARGET_DATA to device
+ number ORD. Allocate and return TARGET_TABLE. */
+
int
GOMP_OFFLOAD_load_image (int ord, const void *target_data,
struct addr_pair **target_table)
unsigned int fn_entries, var_entries, i, j;
CUresult r;
struct targ_fn_descriptor *targ_fns;
+ struct addr_pair *targ_tbl;
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
struct ptx_image_data *new_image;
+ struct ptx_device *dev;
GOMP_OFFLOAD_init_device (ord);
+ dev = ptx_devices[ord];
+
nvptx_attach_host_thread_to_device (ord);
link_ptx (&module, img_header->ptx_src);
- pthread_mutex_lock (&ptx_image_lock);
- new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
- new_image->target_data = target_data;
- new_image->module = module;
- new_image->next = ptx_images;
- ptx_images = new_image;
- pthread_mutex_unlock (&ptx_image_lock);
-
/* The mkoffload utility emits a struct of pointers/integers at the
start of each offload image. The array of kernel names and the
functions addresses form a one-to-one correspondence. */
fn_entries = img_header->fn_num;
fn_names = img_header->fn_names;
- *target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
- * (fn_entries + var_entries));
+ targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
+ * (fn_entries + var_entries));
targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
* fn_entries);
- for (i = 0; i < fn_entries; i++)
+ *target_table = targ_tbl;
+
+ new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
+ new_image->target_data = target_data;
+ new_image->module = module;
+ new_image->fns = targ_fns;
+
+ pthread_mutex_lock (&dev->image_lock);
+ new_image->next = dev->images;
+ dev->images = new_image;
+ pthread_mutex_unlock (&dev->image_lock);
+
+ for (i = 0; i < fn_entries; i++, targ_fns++, targ_tbl++)
{
CUfunction function;
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
- targ_fns[i].fn = function;
- targ_fns[i].name = (const char *) fn_names[i];
+ targ_fns->fn = function;
+ targ_fns->name = (const char *) fn_names[i];
- (*target_table)[i].start = (uintptr_t) &targ_fns[i];
- (*target_table)[i].end = (*target_table)[i].start + 1;
+ targ_tbl->start = (uintptr_t) targ_fns;
+ targ_tbl->end = targ_tbl->start + 1;
}
- for (j = 0; j < var_entries; j++, i++)
+ for (j = 0; j < var_entries; j++, targ_tbl++)
{
CUdeviceptr var;
size_t bytes;
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
- (*target_table)[i].start = (uintptr_t) var;
- (*target_table)[i].end = (*target_table)[i].start + bytes;
+ targ_tbl->start = (uintptr_t) var;
+ targ_tbl->end = targ_tbl->start + bytes;
}
- return i;
+ return fn_entries + var_entries;
}
-void
-GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)),
- const void *target_data)
-{
- const void *const *img_header = (const void *const *) target_data;
- struct targ_fn_descriptor *targ_fns
- = (struct targ_fn_descriptor *) img_header[0];
- struct ptx_image_data *image, *prev = NULL, *newhd = NULL;
-
- free (targ_fns);
-
- pthread_mutex_lock (&ptx_image_lock);
- for (image = ptx_images; image != NULL;)
- {
- struct ptx_image_data *next = image->next;
-
- if (image->target_data == target_data)
- {
- cuModuleUnload (image->module);
- free (image);
- if (prev)
- prev->next = next;
- }
- else
- {
- prev = image;
- if (!newhd)
- newhd = image;
- }
+/* Unload the program described by TARGET_DATA. DEV_DATA is the
+ function descriptors allocated by G_O_load_image. */
- image = next;
- }
- ptx_images = newhd;
- pthread_mutex_unlock (&ptx_image_lock);
+void
+GOMP_OFFLOAD_unload_image (int ord, const void *target_data)
+{
+ struct ptx_image_data *image, **prev_p;
+ struct ptx_device *dev = ptx_devices[ord];
+
+ pthread_mutex_lock (&dev->image_lock);
+ for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next)
+ if (image->target_data == target_data)
+ {
+ *prev_p = image->next;
+ cuModuleUnload (image->module);
+ free (image->fns);
+ free (image);
+ break;
+ }
+ pthread_mutex_unlock (&dev->image_lock);
}
void *