plugin-nvptx.c (struct targ_fn_descriptor): Move later.

author Nathan Sidwell <nathan@codesourcery.com>

Mon, 20 Jul 2015 16:17:57 +0000 (16:17 +0000)

committer Nathan Sidwell <nathan@gcc.gnu.org>

Mon, 20 Jul 2015 16:17:57 +0000 (16:17 +0000)
author Nathan Sidwell <nathan@codesourcery.com>
Mon, 20 Jul 2015 16:17:57 +0000 (16:17 +0000)
committer Nathan Sidwell <nathan@gcc.gnu.org>
Mon, 20 Jul 2015 16:17:57 +0000 (16:17 +0000)
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog

index 8f2334a2815933e133609254e39cbd2f13460bb6..6c7b9421320881918b293087c66273232cbb3bb1 100644 (file)
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,15 @@
+2015-07-20  Nathan Sidwell  <nathan@codesourcery.com>
+
+       * plugin/plugin-nvptx.c (struct targ_fn_descriptor): Move later.
+       (struct ptx_image_data): Move earlier, add fns field.
+       (struct ptx_device): Add images and image_lock fields.
+       (ptx_images, ptx_image_lock): Delete.
+       (nvptx_open_device): Initialize images and image_lock fields.
+       (nvptx_close_device): Destroy image_lock.
+       (GOMP_OFFLOAD_load_image): Register image to device-specific fields.
+       (GOMP_OFFLOAD_unload_image): Unregister image from device-specific
+       fields.
+
  2015-07-17  Nathan Sidwell  <nathan@codesourcery.com>
  
         * target.c (GOMP_offload_register): Use int for device type arg.
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c

index ba6bf0111f38853818c01c2e17716c4b06500aac..fc296321812f76add1cdf1db9bb2f56e34066487 100644 (file)
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -127,12 +127,6 @@ cuda_error (CUresult r)
    return &errmsg[0];
  }
  
-struct targ_fn_descriptor
-{
-  CUfunction fn;
-  const char *name;
-};
-
  static unsigned int instantiated_devices = 0;
  static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
  
@@ -288,6 +282,25 @@ map_push (struct ptx_stream *s, int async, size_t size, void **h, void **d)
    return;
  }
  
+/* Descriptor of a loaded function.  */
+
+struct targ_fn_descriptor
+{
+  CUfunction fn;
+  const char *name;
+};
+
+/* A loaded PTX image.  */
+struct ptx_image_data
+{
+  const void *target_data;
+  CUmodule module;
+
+  struct targ_fn_descriptor *fns;  /* Array of functions.  */
+  
+  struct ptx_image_data *next;
+};
+
  struct ptx_device
  {
    CUcontext ctx;
@@ -311,6 +324,9 @@ struct ptx_device
    int  mode;
    bool mkern;
  
+  struct ptx_image_data *images;  /* Images loaded on device.  */
+  pthread_mutex_t image_lock;     /* Lock for above list.  */
+  
    struct ptx_device *next;
  };
  
@@ -332,21 +348,11 @@ struct ptx_event
    struct ptx_event *next;
  };
  
-struct ptx_image_data
-{
-  const void *target_data;
-  CUmodule module;
-  struct ptx_image_data *next;
-};
-
  static pthread_mutex_t ptx_event_lock;
  static struct ptx_event *ptx_events;
  
  static struct ptx_device **ptx_devices;
  
-static struct ptx_image_data *ptx_images = NULL;
-static pthread_mutex_t ptx_image_lock = PTHREAD_MUTEX_INITIALIZER;
-
  #define _XSTR(s) _STR(s)
  #define _STR(s) #s
  
@@ -590,6 +596,7 @@ select_stream_for_async (int async, pthread_t thread, bool create,
  
  /* Initialize the device.  Return TRUE on success, else FALSE.  PTX_DEV_LOCK
     should be locked on entry and remains locked on exit.  */
+
  static bool
  nvptx_init (void)
  {
@@ -746,6 +753,9 @@ nvptx_open_device (int n)
    if (r != CUDA_SUCCESS)
      async_engines = 1;
  
+  ptx_dev->images = NULL;
+  pthread_mutex_init (&ptx_dev->image_lock, NULL);
+
    init_streams_for_device (ptx_dev, async_engines);
  
    return ptx_dev;
@@ -760,6 +770,8 @@ nvptx_close_device (struct ptx_device *ptx_dev)
      return;
  
    fini_streams_for_device (ptx_dev);
+  
+  pthread_mutex_destroy (&ptx_dev->image_lock);
  
    if (!ptx_dev->ctx_shared)
      {
@@ -1632,6 +1644,9 @@ typedef struct nvptx_tdata
    size_t fn_num;
  } nvptx_tdata_t;
  
+/* Load the (partial) program described by TARGET_DATA to device
+   number ORD.  Allocate and return TARGET_TABLE.  */
+
  int
  GOMP_OFFLOAD_load_image (int ord, const void *target_data,
                          struct addr_pair **target_table)
@@ -1641,23 +1656,19 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
    unsigned int fn_entries, var_entries, i, j;
    CUresult r;
    struct targ_fn_descriptor *targ_fns;
+  struct addr_pair *targ_tbl;
    const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
    struct ptx_image_data *new_image;
+  struct ptx_device *dev;
  
    GOMP_OFFLOAD_init_device (ord);
  
+  dev = ptx_devices[ord];
+  
    nvptx_attach_host_thread_to_device (ord);
  
    link_ptx (&module, img_header->ptx_src);
  
-  pthread_mutex_lock (&ptx_image_lock);
-  new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
-  new_image->target_data = target_data;
-  new_image->module = module;
-  new_image->next = ptx_images;
-  ptx_images = new_image;
-  pthread_mutex_unlock (&ptx_image_lock);
-
    /* The mkoffload utility emits a struct of pointers/integers at the
       start of each offload image.  The array of kernel names and the
       functions addresses form a one-to-one correspondence.  */
@@ -1667,12 +1678,24 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
    fn_entries = img_header->fn_num;
    fn_names = img_header->fn_names;
  
-  *target_table = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
-                                     * (fn_entries + var_entries));
+  targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
+                                * (fn_entries + var_entries));
    targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
                                  * fn_entries);
  
-  for (i = 0; i < fn_entries; i++)
+  *target_table = targ_tbl;
+
+  new_image = GOMP_PLUGIN_malloc (sizeof (struct ptx_image_data));
+  new_image->target_data = target_data;
+  new_image->module = module;
+  new_image->fns = targ_fns;
+
+  pthread_mutex_lock (&dev->image_lock);
+  new_image->next = dev->images;
+  dev->images = new_image;
+  pthread_mutex_unlock (&dev->image_lock);
+
+  for (i = 0; i < fn_entries; i++, targ_fns++, targ_tbl++)
      {
        CUfunction function;
  
@@ -1680,14 +1703,14 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
        if (r != CUDA_SUCCESS)
         GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
  
-      targ_fns[i].fn = function;
-      targ_fns[i].name = (const char *) fn_names[i];
+      targ_fns->fn = function;
+      targ_fns->name = (const char *) fn_names[i];
  
-      (*target_table)[i].start = (uintptr_t) &targ_fns[i];
-      (*target_table)[i].end = (*target_table)[i].start + 1;
+      targ_tbl->start = (uintptr_t) targ_fns;
+      targ_tbl->end = targ_tbl->start + 1;
      }
  
-  for (j = 0; j < var_entries; j++, i++)
+  for (j = 0; j < var_entries; j++, targ_tbl++)
      {
        CUdeviceptr var;
        size_t bytes;
@@ -1696,47 +1719,33 @@ GOMP_OFFLOAD_load_image (int ord, const void *target_data,
        if (r != CUDA_SUCCESS)
          GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
  
-      (*target_table)[i].start = (uintptr_t) var;
-      (*target_table)[i].end = (*target_table)[i].start + bytes;
+      targ_tbl->start = (uintptr_t) var;
+      targ_tbl->end = targ_tbl->start + bytes;
      }
  
-  return i;
+  return fn_entries + var_entries;
  }
  
-void
-GOMP_OFFLOAD_unload_image (int tid __attribute__((unused)),
-                          const void *target_data)
-{
-  const void *const *img_header = (const void *const *) target_data;
-  struct targ_fn_descriptor *targ_fns
-    = (struct targ_fn_descriptor *) img_header[0];
-  struct ptx_image_data *image, *prev = NULL, *newhd = NULL;
-
-  free (targ_fns);
-
-  pthread_mutex_lock (&ptx_image_lock);
-  for (image = ptx_images; image != NULL;)
-    {
-      struct ptx_image_data *next = image->next;
-
-      if (image->target_data == target_data)
-       {
-         cuModuleUnload (image->module);
-         free (image);
-         if (prev)
-           prev->next = next;
-       }
-      else
-       {
-         prev = image;
-         if (!newhd)
-           newhd = image;
-       }
+/* Unload the program described by TARGET_DATA.  DEV_DATA is the
+   function descriptors allocated by G_O_load_image.  */
  
-      image = next;
-    }
-  ptx_images = newhd;
-  pthread_mutex_unlock (&ptx_image_lock);
+void
+GOMP_OFFLOAD_unload_image (int ord, const void *target_data)
+{
+  struct ptx_image_data *image, **prev_p;
+  struct ptx_device *dev = ptx_devices[ord];
+
+  pthread_mutex_lock (&dev->image_lock);
+  for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next)
+    if (image->target_data == target_data)
+      {
+       *prev_p = image->next;
+       cuModuleUnload (image->module);
+       free (image->fns);
+       free (image);
+       break;
+      }
+  pthread_mutex_unlock (&dev->image_lock);
  }
  
  void *
author	Nathan Sidwell <nathan@codesourcery.com>
	Mon, 20 Jul 2015 16:17:57 +0000 (16:17 +0000)
committer	Nathan Sidwell <nathan@gcc.gnu.org>
	Mon, 20 Jul 2015 16:17:57 +0000 (16:17 +0000)
libgomp/ChangeLog		patch \| blob \| history
libgomp/plugin/plugin-nvptx.c		patch \| blob \| history