From f3e9a059a74680325960aa3e6ebc629773c9e1f8 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 20 Jul 2015 16:17:57 +0000 Subject: [PATCH] plugin-nvptx.c (struct targ_fn_descriptor): Move later. * plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later. (struct ptx_image_data): Move earlier, add fns field. (struct ptx_device): Add images and image_lock fields. (ptx_images, ptx_image_lock): Delete. (nvptx_open_device): Initialize images and image_lock fields. (nvptx_close_device): Destroy image_lock. (GOMP_OFFLOAD_load_image): Register image to device-specific fields. (GOMP_OFFLOAD_unload_image): Unregister image from device-specific fields. From-SVN: r226004 --- libgomp/ChangeLog | 12 +++ libgomp/plugin/plugin-nvptx.c | 145 ++++++++++++++++++---------------- 2 files changed, 89 insertions(+), 68 deletions(-) diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 8f2334a2815..6c7b9421320 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,15 @@ +2015-07-20 Nathan Sidwell + + * plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later. + (struct ptx_image_data): Move earlier, add fns field. + (struct ptx_device): Add images and image_lock fields. + (ptx_images, ptx_image_lock): Delete. + (nvptx_open_device): Initialize images and image_lock fields. + (nvptx_close_device): Destroy image_lock. + (GOMP_OFFLOAD_load_image): Register image to device-specific fields. + (GOMP_OFFLOAD_unload_image): Unregister image from device-specific + fields. + 2015-07-17 Nathan Sidwell * target.c (GOMP_offload_register): Use int for device type arg. diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index ba6bf0111f3..fc296321812 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -127,12 +127,6 @@ cuda_error (CUresult r) return &errmsg[0]; } -struct targ_fn_descriptor -{ - CUfunction fn; - const char *name; -}; - static unsigned int instantiated_devices = 0; static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER; @@ -288,6 +282,25 @@ map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d) return; } +/* Descriptor of a loaded function. */ + +struct targ_fn_descriptor +{ + CUfunction fn; + const char *name; +}; + +/* A loaded PTX image. */ +struct ptx_image_data +{ + const void *target_data; + CUmodule module; + + struct targ_fn_descriptor *fns; /* Array of functions. */ + + struct ptx_image_data *next; +}; + struct ptx_device { CUcontext ctx; @@ -311,6 +324,9 @@ struct ptx_device int mode; bool mkern; + struct ptx_image_data *images; /* Images loaded on device. */ + pthread_mutex_t image_lock; /* Lock for above list. */ + struct ptx_device *next; }; @@ -332,21 +348,11 @@ struct ptx_event struct ptx_event *next; }; -struct ptx_image_data -{ - const void *target_data; - CUmodule module; - struct ptx_image_data *next; -}; - static pthread_mutex_t ptx_event_lock; static struct ptx_event *ptx_events; static struct ptx_device **ptx_devices; -static struct ptx_image_data *ptx_images = NULL; -static pthread_mutex_t ptx_image_lock = PTHREAD_MUTEX_INITIALIZER; - #define _XSTR(s) _STR(s) #define _STR(s) #s @@ -590,6 +596,7 @@ select_stream_for_async (int async, pthread_t thread, bool create, /* Initialize the device. Return TRUE on success, else FALSE. PTX_DEV_LOCK should be locked on entry and remains locked on exit. */ + static bool nvptx_init (void) { @@ -746,6 +753,9 @@ nvptx_open_device (int n) if (r != CUDA_SUCCESS) async_engines = 1; + ptx_dev->images = NULL; + pthread_mutex_init (&ptx_dev->image_lock, NULL); + init_streams_for_device (ptx_dev, async_engines); return ptx_dev; @@ -760,6 +770,8 @@ nvptx_close_device (struct ptx_device *ptx_dev) return; fini_streams_for_device (ptx_dev); + + pthread_mutex_destroy (&ptx_dev->image_lock); if (!ptx_dev->ctx_shared) { @@ -1632,6 +1644,9 @@ typedef struct nvptx_tdata size_t fn_num; } nvptx_tdata_t; +/* Load the (partial) program described by TARGET_DATA to device + number ORD. Allocate and return TARGET_TABLE. */ + int GOMP_OFFLOAD_load_image (int ord, const void *target_data, struct addr_pair **target_table) @@ -1641,23 +1656,19 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data, unsigned int fn_entries, var_entries, i, j; CUresult r; struct targ_fn_descriptor *targ_fns; + struct addr_pair *targ_tbl; const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data; struct ptx_image_data *new_image; + struct ptx_device *dev; GOMP_OFFLOAD_init_device (ord); + dev = ptx_devices[ord]; + nvptx_attach_host_thread_to_device (ord); link_ptx (&module, img_header->ptx_src); - pthread_mutex_lock (&ptx_image_lock); - new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data)); - new_image->target_data = target_data; - new_image->module = module; - new_image->next = ptx_images; - ptx_images = new_image; - pthread_mutex_unlock (&ptx_image_lock); - /* The mkoffload utility emits a struct of pointers/integers at the start of each offload image. The array of kernel names and the functions addresses form a one-to-one correspondence. */ @@ -1667,12 +1678,24 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data, fn_entries = img_header->fn_num; fn_names = img_header->fn_names; - *target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair) - * (fn_entries + var_entries)); + targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair) + * (fn_entries + var_entries)); targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor) * fn_entries); - for (i = 0; i < fn_entries; i++) + *target_table = targ_tbl; + + new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data)); + new_image->target_data = target_data; + new_image->module = module; + new_image->fns = targ_fns; + + pthread_mutex_lock (&dev->image_lock); + new_image->next = dev->images; + dev->images = new_image; + pthread_mutex_unlock (&dev->image_lock); + + for (i = 0; i < fn_entries; i++, targ_fns++, targ_tbl++) { CUfunction function; @@ -1680,14 +1703,14 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data, if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r)); - targ_fns[i].fn = function; - targ_fns[i].name = (const char *) fn_names[i]; + targ_fns->fn = function; + targ_fns->name = (const char *) fn_names[i]; - (*target_table)[i].start = (uintptr_t) &targ_fns[i]; - (*target_table)[i].end = (*target_table)[i].start + 1; + targ_tbl->start = (uintptr_t) targ_fns; + targ_tbl->end = targ_tbl->start + 1; } - for (j = 0; j < var_entries; j++, i++) + for (j = 0; j < var_entries; j++, targ_tbl++) { CUdeviceptr var; size_t bytes; @@ -1696,47 +1719,33 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data, if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r)); - (*target_table)[i].start = (uintptr_t) var; - (*target_table)[i].end = (*target_table)[i].start + bytes; + targ_tbl->start = (uintptr_t) var; + targ_tbl->end = targ_tbl->start + bytes; } - return i; + return fn_entries + var_entries; } -void -GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)), - const void *target_data) -{ - const void *const *img_header = (const void *const *) target_data; - struct targ_fn_descriptor *targ_fns - = (struct targ_fn_descriptor *) img_header[0]; - struct ptx_image_data *image, *prev = NULL, *newhd = NULL; - - free (targ_fns); - - pthread_mutex_lock (&ptx_image_lock); - for (image = ptx_images; image != NULL;) - { - struct ptx_image_data *next = image->next; - - if (image->target_data == target_data) - { - cuModuleUnload (image->module); - free (image); - if (prev) - prev->next = next; - } - else - { - prev = image; - if (!newhd) - newhd = image; - } +/* Unload the program described by TARGET_DATA. DEV_DATA is the + function descriptors allocated by G_O_load_image. */ - image = next; - } - ptx_images = newhd; - pthread_mutex_unlock (&ptx_image_lock); +void +GOMP_OFFLOAD_unload_image (int ord, const void *target_data) +{ + struct ptx_image_data *image, **prev_p; + struct ptx_device *dev = ptx_devices[ord]; + + pthread_mutex_lock (&dev->image_lock); + for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next) + if (image->target_data == target_data) + { + *prev_p = image->next; + cuModuleUnload (image->module); + free (image->fns); + free (image); + break; + } + pthread_mutex_unlock (&dev->image_lock); } void * -- 2.30.2