+2016-05-26 Chung-Lin Tang <cltang@codesourcery.com>
+
+ * target.c (gomp_device_copy): New function.
+ (gomp_copy_host2dev): Likewise.
+ (gomp_copy_dev2host): Likewise.
+ (gomp_free_device_memory): Likewise.
+ (gomp_map_vars_existing): Adjust to call gomp_copy_host2dev.
+ (gomp_map_pointer): Likewise.
+ (gomp_map_vars): Adjust to call gomp_copy_host2dev, handle
+ NULL value from alloc_func plugin hook.
+ (gomp_unmap_tgt): Adjust to call gomp_free_device_memory.
+ (gomp_copy_from_async): Adjust to call gomp_copy_dev2host.
+ (gomp_unmap_vars): Likewise.
+ (gomp_update): Adjust to call gomp_copy_dev2host and
+ gomp_copy_host2dev functions.
+ (gomp_unload_image_from_device): Handle false value from
+ unload_image_func plugin hook.
+ (gomp_init_device): Handle false value from init_device_func
+ plugin hook.
+ (gomp_exit_data): Adjust to call gomp_copy_dev2host.
+ (omp_target_free): Adjust to call gomp_free_device_memory.
+ (omp_target_memcpy): Handle return values from host2dev_func,
+ dev2host_func, and dev2dev_func plugin hooks.
+ (omp_target_memcpy_rect_worker): Likewise.
+ (gomp_target_fini): Handle false value from fini_device_func
+ plugin hook.
+ * libgomp.h (struct gomp_device_descr): Adjust return type of
+ init_device_func, fini_device_func, unload_image_func, free_func,
+ dev2host_func,host2dev_func, and dev2dev_func plugin hooks to 'bool'.
+ * oacc-init.c (acc_shutdown_1): Handle false value from
+ fini_device_func plugin hook.
+ * oacc-host.c (host_init_device): Change return type to bool.
+ (host_fini_device): Likewise.
+ (host_unload_image): Likewise.
+ (host_free): Likewise.
+ (host_dev2host): Likewise.
+ (host_host2dev): Likewise.
+ * oacc-mem.c (acc_free): Handle plugin hook fatal error case.
+ (acc_memcpy_to_device): Likewise.
+ (acc_memcpy_from_device): Likewise.
+ (delete_copyout): Add libfnname parameter, handle free_func
+ hook fatal error case.
+ (acc_delete): Adjust delete_copyout call.
+ (acc_copyout): Likewise.
+ (update_dev_host): Move gomp_mutex_unlock to after
+ host2dev/dev2host hook calls.
+
+ * plugin/plugin-hsa.c (hsa_warn): Adjust 'hsa_error' local variable
+ to 'hsa_error_msg', for clarity.
+ (hsa_fatal): Likewise.
+ (hsa_error): New function.
+ (init_hsa_context): Change return type to bool, adjust to return
+ false on error.
+ (GOMP_OFFLOAD_get_num_devices): Adjust to handle init_hsa_context
+ return value.
+ (GOMP_OFFLOAD_init_device): Change return type to bool, adjust to
+ return false on error.
+ (get_agent_info): Adjust to return NULL on error.
+ (destroy_hsa_program): Change return type to bool, adjust to
+ return false on error.
+ (GOMP_OFFLOAD_load_image): Adjust to return -1 on error.
+ (destroy_module): Change return type to bool, adjust to
+ return false on error.
+ (GOMP_OFFLOAD_unload_image): Likewise.
+ (GOMP_OFFLOAD_fini_device): Likewise.
+ (GOMP_OFFLOAD_alloc): Change to return NULL when called.
+ (GOMP_OFFLOAD_free): Change to return false when called.
+ (GOMP_OFFLOAD_dev2host): Likewise.
+ (GOMP_OFFLOAD_host2dev): Likewise.
+ (GOMP_OFFLOAD_dev2dev): Likewise.
+
+ * plugin/plugin-nvptx.c (CUDA_CALL_ERET): New convenience macro.
+ (CUDA_CALL): Likewise.
+ (CUDA_CALL_ASSERT): Likewise.
+ (map_init): Change return type to bool, use CUDA_CALL* macros.
+ (map_fini): Likewise.
+ (init_streams_for_device): Change return type to bool, adjust
+ call to map_init.
+ (fini_streams_for_device): Change return type to bool, adjust
+ call to map_fini.
+ (select_stream_for_async): Release stream_lock before calls to
+ GOMP_PLUGIN_fatal, adjust call to map_init.
+ (nvptx_init): Use CUDA_CALL* macros.
+ (nvptx_attach_host_thread_to_device): Change return type to bool,
+ use CUDA_CALL* macros.
+ (nvptx_open_device): Use CUDA_CALL* macros.
+ (nvptx_close_device): Change return type to bool, use CUDA_CALL*
+ macros.
+ (nvptx_get_num_devices): Use CUDA_CALL* macros.
+ (link_ptx): Change return type to bool, use CUDA_CALL* macros.
+ (nvptx_exec): Use CUDA_CALL* macros.
+ (nvptx_alloc): Use CUDA_CALL* macros.
+ (nvptx_free): Change return type to bool, use CUDA_CALL* macros.
+ (nvptx_host2dev): Likewise.
+ (nvptx_dev2host): Likewise.
+ (nvptx_wait): Use CUDA_CALL* macros.
+ (nvptx_wait_async): Likewise.
+ (nvptx_wait_all): Likewise.
+ (nvptx_wait_all_async): Likewise.
+ (nvptx_set_cuda_stream): Adjust order of stream_lock acquire,
+ use CUDA_CALL* macros, adjust call to map_fini.
+ (GOMP_OFFLOAD_init_device): Change return type to bool,
+ adjust code accordingly.
+ (GOMP_OFFLOAD_fini_device): Likewise.
+ (GOMP_OFFLOAD_load_image): Adjust calls to
+ nvptx_attach_host_thread_to_device/link_ptx to handle errors,
+ use CUDA_CALL* macros.
+ (GOMP_OFFLOAD_unload_image): Change return type to bool, adjust
+ return code.
+ (GOMP_OFFLOAD_alloc): Adjust calls to code to handle error return.
+ (GOMP_OFFLOAD_free): Change return type to bool, adjust calls to
+ handle error return.
+ (GOMP_OFFLOAD_dev2host): Likewise.
+ (GOMP_OFFLOAD_host2dev): Likewise.
+ (GOMP_OFFLOAD_openacc_register_async_cleanup): Use CUDA_CALL* macros.
+ (GOMP_OFFLOAD_openacc_create_thread_data): Likewise.
+
2016-05-24 Cesar Philippidis <cesar@codesourcery.com>
* oacc-mem.c (acc_malloc): Update handling of shared-memory targets.
unsigned int (*get_caps_func) (void);
int (*get_type_func) (void);
int (*get_num_devices_func) (void);
- void (*init_device_func) (int);
- void (*fini_device_func) (int);
+ bool (*init_device_func) (int);
+ bool (*fini_device_func) (int);
unsigned (*version_func) (void);
int (*load_image_func) (int, unsigned, const void *, struct addr_pair **);
- void (*unload_image_func) (int, unsigned, const void *);
+ bool (*unload_image_func) (int, unsigned, const void *);
void *(*alloc_func) (int, size_t);
- void (*free_func) (int, void *);
- void *(*dev2host_func) (int, void *, const void *, size_t);
- void *(*host2dev_func) (int, void *, const void *, size_t);
- void *(*dev2dev_func) (int, void *, const void *, size_t);
+ bool (*free_func) (int, void *);
+ bool (*dev2host_func) (int, void *, const void *, size_t);
+ bool (*host2dev_func) (int, void *, const void *, size_t);
+ bool (*dev2dev_func) (int, void *, const void *, size_t);
bool (*can_run_func) (void *);
void (*run_func) (int, void *, void *, void **);
void (*async_run_func) (int, void *, void *, void **, void *);
return 1;
}
-static void
+static bool
host_init_device (int n __attribute__ ((unused)))
{
+ return true;
}
-static void
+static bool
host_fini_device (int n __attribute__ ((unused)))
{
+ return true;
}
static unsigned
return 0;
}
-static void
+static bool
host_unload_image (int n __attribute__ ((unused)),
unsigned v __attribute__ ((unused)),
const void *t __attribute__ ((unused)))
{
+ return true;
}
static void *
return gomp_malloc (s);
}
-static void
+static bool
host_free (int n __attribute__ ((unused)), void *p)
{
free (p);
+ return true;
}
-static void *
+static bool
host_dev2host (int n __attribute__ ((unused)),
void *h __attribute__ ((unused)),
const void *d __attribute__ ((unused)),
size_t s __attribute__ ((unused)))
{
- return NULL;
+ return true;
}
-static void *
+static bool
host_host2dev (int n __attribute__ ((unused)),
void *d __attribute__ ((unused)),
const void *h __attribute__ ((unused)),
size_t s __attribute__ ((unused)))
{
- return NULL;
+ return true;
}
static void
gomp_mutex_unlock (&goacc_thread_lock);
-
/* Close all the devices of this type that have been opened. */
+ bool ret = true;
for (i = 0; i < ndevs; i++)
{
struct gomp_device_descr *acc_dev = &base_dev[i];
if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
{
devices_active = true;
- acc_dev->fini_device_func (acc_dev->target_id);
+ ret &= acc_dev->fini_device_func (acc_dev->target_id);
acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
}
gomp_mutex_unlock (&acc_dev->lock);
}
+ if (!ret)
+ gomp_fatal ("device finalization failed");
+
if (!devices_active)
gomp_fatal ("no device initialized");
}
else
gomp_mutex_unlock (&acc_dev->lock);
- acc_dev->free_func (acc_dev->target_id, d);
+ if (!acc_dev->free_func (acc_dev->target_id, d))
+ gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
}
void
return;
}
- thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
+ if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
+ gomp_fatal ("error in %s", __FUNCTION__);
}
void
return;
}
- thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
+ if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
+ gomp_fatal ("error in %s", __FUNCTION__);
}
/* Return the device pointer that corresponds to host data H. Or NULL
#define FLAG_COPYOUT (1 << 0)
static void
-delete_copyout (unsigned f, void *h, size_t s)
+delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
{
size_t host_size;
splay_tree_key n;
acc_unmap_data (h);
- acc_dev->free_func (acc_dev->target_id, d);
+ if (!acc_dev->free_func (acc_dev->target_id, d))
+ gomp_fatal ("error in freeing device memory in %s", libfnname);
}
void
acc_delete (void *h , size_t s)
{
- delete_copyout (0, h, s);
+ delete_copyout (0, h, s, __FUNCTION__);
}
-void acc_copyout (void *h, size_t s)
+void
+acc_copyout (void *h, size_t s)
{
- delete_copyout (FLAG_COPYOUT, h, s);
+ delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
}
static void
d = (void *) (n->tgt->tgt_start + n->tgt_offset
+ (uintptr_t) h - n->host_start);
- gomp_mutex_unlock (&acc_dev->lock);
-
if (is_dev)
acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
else
acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+
+ gomp_mutex_unlock (&acc_dev->lock);
}
void
if (!debug)
return;
- const char *hsa_error;
- hsa_status_string (status, &hsa_error);
+ const char *hsa_error_msg;
+ hsa_status_string (status, &hsa_error_msg);
- fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error);
+ fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
}
/* Report a fatal error STR together with the HSA error corresponding to STATUS
static void
hsa_fatal (const char *str, hsa_status_t status)
{
- const char *hsa_error;
- hsa_status_string (status, &hsa_error);
+ const char *hsa_error_msg;
+ hsa_status_string (status, &hsa_error_msg);
GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
- hsa_error);
+ hsa_error_msg);
+}
+
+/* Like hsa_fatal, except only report error message, and return FALSE
+ for propagating error processing to outside of plugin. */
+
+static bool
+hsa_error (const char *str, hsa_status_t status)
+{
+ const char *hsa_error_msg;
+ hsa_status_string (status, &hsa_error_msg);
+ GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
+ hsa_error_msg);
+ return false;
}
struct hsa_kernel_description
return HSA_STATUS_SUCCESS;
}
-/* Initialize hsa_context if it has not already been done. */
+/* Initialize hsa_context if it has not already been done.
+ Return TRUE on success. */
-static void
+static bool
init_hsa_context (void)
{
hsa_status_t status;
int agent_index = 0;
if (hsa_context.initialized)
- return;
+ return true;
init_enviroment_variables ();
status = hsa_init ();
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Run-time could not be initialized", status);
+ return hsa_error ("Run-time could not be initialized", status);
HSA_DEBUG ("HSA run-time initialized\n");
status = hsa_iterate_agents (count_gpu_agents, NULL);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("HSA GPU devices could not be enumerated", status);
+ return hsa_error ("HSA GPU devices could not be enumerated", status);
HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
hsa_context.agents
* sizeof (struct agent_info));
status = hsa_iterate_agents (assign_agent_ids, &agent_index);
if (agent_index != hsa_context.agent_count)
- GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents");
+ {
+ GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
+ return false;
+ }
hsa_context.initialized = true;
+ return true;
}
/* Callback of dispatch queues to report errors. */
int
GOMP_OFFLOAD_get_num_devices (void)
{
- init_hsa_context ();
+ if (!init_hsa_context ())
+ return 0;
return hsa_context.agent_count;
}
/* Part of the libgomp plugin interface. Initialize agent number N so that it
- can be used for computation. */
+ can be used for computation. Return TRUE on success. */
-void
+bool
GOMP_OFFLOAD_init_device (int n)
{
- init_hsa_context ();
+ if (!init_hsa_context ())
+ return false;
if (n >= hsa_context.agent_count)
- GOMP_PLUGIN_fatal ("Request to initialize non-existing HSA device %i", n);
+ {
+ GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
+ return false;
+ }
struct agent_info *agent = &hsa_context.agents[n];
if (agent->initialized)
- return;
+ return true;
if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
- GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
+ return false;
+ }
if (pthread_mutex_init (&agent->prog_mutex, NULL))
- GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent program mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
+ return false;
+ }
uint32_t queue_size;
hsa_status_t status;
status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE,
&queue_size);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error requesting maximum queue size of the HSA agent", status);
+ return hsa_error ("Error requesting maximum queue size of the HSA agent",
+ status);
status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error querying the ISA of the agent", status);
+ return hsa_error ("Error querying the ISA of the agent", status);
status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
queue_callback, NULL, UINT32_MAX, UINT32_MAX,
&agent->command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error creating command queue", status);
+ return hsa_error ("Error creating command queue", status);
status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI,
queue_callback, NULL, UINT32_MAX, UINT32_MAX,
&agent->kernel_dispatch_command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error creating kernel dispatch command queue", status);
+ return hsa_error ("Error creating kernel dispatch command queue", status);
agent->kernarg_region.handle = (uint64_t) -1;
status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region,
&agent->kernarg_region);
if (agent->kernarg_region.handle == (uint64_t) -1)
- GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel "
- "arguments");
+ {
+ GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
+ "arguments");
+ return false;
+ }
HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
(long long unsigned) agent->command_q->id);
HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
(long long unsigned) agent->kernel_dispatch_command_q->id);
agent->initialized = true;
+ return true;
}
/* Verify that hsa_context has already been initialized and return the
- agent_info structure describing device number N. */
+ agent_info structure describing device number N. Return NULL on error. */
static struct agent_info *
get_agent_info (int n)
{
if (!hsa_context.initialized)
- GOMP_PLUGIN_fatal ("Attempt to use uninitialized HSA context.");
+ {
+ GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
+ return NULL;
+ }
if (n >= hsa_context.agent_count)
- GOMP_PLUGIN_fatal ("Request to operate on anon-existing HSA device %i", n);
+ {
+ GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
+ return NULL;
+ }
if (!hsa_context.agents[n].initialized)
- GOMP_PLUGIN_fatal ("Attempt to use an uninitialized HSA agent.");
+ {
+ GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
+ return NULL;
+ }
return &hsa_context.agents[n];
}
}
/* Free the HSA program in agent and everything associated with it and set
- agent->prog_finalized and the initialized flags of all kernels to false. */
+ agent->prog_finalized and the initialized flags of all kernels to false.
+ Return TRUE on success. */
-static void
+static bool
destroy_hsa_program (struct agent_info *agent)
{
if (!agent->prog_finalized || agent->prog_finalized_error)
- return;
+ return true;
hsa_status_t status;
status = hsa_executable_destroy (agent->executable);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not destroy HSA executable", status);
+ return hsa_error ("Could not destroy HSA executable", status);
struct module_info *module;
for (module = agent->first_module; module; module = module->next)
module->kernels[i].initialized = false;
}
agent->prog_finalized = false;
+ return true;
}
/* Part of the libgomp plugin interface. Load BRIG module described by struct
struct addr_pair **target_table)
{
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ return -1;
+ }
struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
struct agent_info *agent;
int kernel_count = image_desc->kernel_count;
agent = get_agent_info (ord);
+ if (!agent)
+ return -1;
+
if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
- if (agent->prog_finalized)
- destroy_hsa_program (agent);
+ {
+ GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
+ return -1;
+ }
+ if (agent->prog_finalized
+ && !destroy_hsa_program (agent))
+ return -1;
HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
kernel->dependencies_count = d->kernel_dependencies_count;
kernel->dependencies = d->kernel_dependencies;
if (pthread_mutex_init (&kernel->init_mutex, NULL))
- GOMP_PLUGIN_fatal ("Failed to initialize an HSA kernel mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
+ return -1;
+ }
kernel++;
pair++;
add_module_to_agent (agent, module);
if (pthread_rwlock_unlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
+ return -1;
+ }
return kernel_count;
}
}
/* Deinitialize all information associated with MODULE and kernels within
- it. */
+ it. Return TRUE on success. */
-void
+static bool
destroy_module (struct module_info *module)
{
int i;
for (i = 0; i < module->kernel_count; i++)
if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
- GOMP_PLUGIN_fatal ("Failed to destroy an HSA kernel initialization "
- "mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
+ "mutex");
+ return false;
+ }
+ return true;
}
/* Part of the libgomp plugin interface. Unload BRIG module described by
- struct brig_image_desc in TARGET_DATA from agent number N. */
+ struct brig_image_desc in TARGET_DATA from agent number N. Return
+ TRUE on success. */
-void
+bool
GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data)
{
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
+ return false;
+ }
struct agent_info *agent;
agent = get_agent_info (n);
- if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock");
+ if (!agent)
+ return false;
+ if (pthread_rwlock_wrlock (&agent->modules_rwlock))
+ {
+ GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
+ return false;
+ }
struct module_info *module = agent->first_module;
while (module)
{
module = module->next;
}
if (!module)
- GOMP_PLUGIN_fatal ("Attempt to unload an image that has never been "
- "loaded before");
+ {
+ GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
+ "loaded before");
+ return false;
+ }
remove_module_from_agent (agent, module);
- destroy_module (module);
+ if (!destroy_module (module))
+ return false;
free (module);
- destroy_hsa_program (agent);
+ if (!destroy_hsa_program (agent))
+ return false;
if (pthread_rwlock_unlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
+ return false;
+ }
+ return true;
}
/* Part of the libgomp plugin interface. Deinitialize all information and
status associated with agent number N. We do not attempt any
synchronization, assuming the user and libgomp will not attempt
deinitialization of a device that is in any way being used at the same
- time. */
+ time. Return TRUE on success. */
-void
+bool
GOMP_OFFLOAD_fini_device (int n)
{
struct agent_info *agent = get_agent_info (n);
+ if (!agent)
+ return false;
+
if (!agent->initialized)
- return;
+ return true;
struct module_info *next_module = agent->first_module;
while (next_module)
{
struct module_info *module = next_module;
next_module = module->next;
- destroy_module (module);
+ if (!destroy_module (module))
+ return false;
free (module);
}
agent->first_module = NULL;
- destroy_hsa_program (agent);
+ if (!destroy_hsa_program (agent))
+ return false;
release_agent_shared_libraries (agent);
hsa_status_t status = hsa_queue_destroy (agent->command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error destroying command queue", status);
+ return hsa_error ("Error destroying command queue", status);
status = hsa_queue_destroy (agent->kernel_dispatch_command_q);
if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error destroying kernel dispatch command queue", status);
+ return hsa_error ("Error destroying kernel dispatch command queue", status);
if (pthread_mutex_destroy (&agent->prog_mutex))
- GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex");
+ {
+ GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
+ return false;
+ }
if (pthread_rwlock_destroy (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent rwlock");
+ {
+ GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
+ return false;
+ }
agent->initialized = false;
+ return true;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
void *
GOMP_OFFLOAD_alloc (int ord, size_t size)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_alloc is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
"it should never be called");
+ return NULL;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void
+bool
GOMP_OFFLOAD_free (int ord, void *ptr)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_free is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
"it should never be called");
+ return false;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void *
+bool
GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
"it should never be called");
+ return false;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void *
+bool
GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
"it should never be called");
+ return false;
}
/* Part of the libgomp plugin interface. Not implemented as it is not required
for HSA. */
-void *
+bool
GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
{
- GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
+ GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
"it should never be called");
+ return false;
}
return desc;
}
+/* Convenience macros for the frequently used CUDA library call and
+ error handling sequence. This does not capture all the cases we
+ use in this file, but is common enough. */
+
+#define CUDA_CALL_ERET(ERET, FN, ...) \
+ do { \
+ unsigned __r = FN (__VA_ARGS__); \
+ if (__r != CUDA_SUCCESS) \
+ { \
+ GOMP_PLUGIN_error (#FN " error: %s", \
+ cuda_error (__r)); \
+ return ERET; \
+ } \
+ } while (0)
+
+#define CUDA_CALL(FN, ...) \
+ CUDA_CALL_ERET (false, (FN), __VA_ARGS__)
+
+#define CUDA_CALL_ASSERT(FN, ...) \
+ do { \
+ unsigned __r = FN (__VA_ARGS__); \
+ if (__r != CUDA_SUCCESS) \
+ { \
+ GOMP_PLUGIN_fatal (#FN " error: %s", \
+ cuda_error (__r)); \
+ } \
+ } while (0)
+
static unsigned int instantiated_devices = 0;
static pthread_mutex_t ptx_dev_lock = PTHREAD_MUTEX_INITIALIZER;
char mappings[0];
};
-static void
+static bool
map_init (struct ptx_stream *s)
{
- CUresult r;
-
int size = getpagesize ();
assert (s);
assert (!s->d);
assert (!s->h);
- r = cuMemAllocHost (&s->h, size);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemAllocHost error: %s", cuda_error (r));
-
- r = cuMemHostGetDevicePointer (&s->d, s->h, 0);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemHostGetDevicePointer error: %s", cuda_error (r));
+ CUDA_CALL (cuMemAllocHost, &s->h, size);
+ CUDA_CALL (cuMemHostGetDevicePointer, &s->d, s->h, 0);
assert (s->h);
assert (s->h_next);
assert (s->h_end);
+ return true;
}
-static void
+static bool
map_fini (struct ptx_stream *s)
{
- CUresult r;
-
- r = cuMemFreeHost (s->h);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_error ("cuMemFreeHost error: %s", cuda_error (r));
+ CUDA_CALL (cuMemFreeHost, s->h);
+ return true;
}
static void
return (struct nvptx_thread *) GOMP_PLUGIN_acc_thread ();
}
-static void
+static bool
init_streams_for_device (struct ptx_device *ptx_dev, int concurrency)
{
int i;
null_stream->multithreaded = true;
null_stream->d = (CUdeviceptr) NULL;
null_stream->h = NULL;
- map_init (null_stream);
- ptx_dev->null_stream = null_stream;
+ if (!map_init (null_stream))
+ return false;
+ ptx_dev->null_stream = null_stream;
ptx_dev->active_streams = NULL;
pthread_mutex_init (&ptx_dev->stream_lock, NULL);
for (i = 0; i < concurrency; i++)
ptx_dev->async_streams.arr[i] = NULL;
+
+ return true;
}
-static void
+static bool
fini_streams_for_device (struct ptx_device *ptx_dev)
{
free (ptx_dev->async_streams.arr);
+ bool ret = true;
while (ptx_dev->active_streams != NULL)
{
struct ptx_stream *s = ptx_dev->active_streams;
ptx_dev->active_streams = ptx_dev->active_streams->next;
- map_fini (s);
- cuStreamDestroy (s->stream);
+ ret &= map_fini (s);
+
+ CUresult r = cuStreamDestroy (s->stream);
+ if (r != CUDA_SUCCESS)
+ {
+ GOMP_PLUGIN_error ("cuStreamDestroy error: %s", cuda_error (r));
+ ret = false;
+ }
free (s);
}
- map_fini (ptx_dev->null_stream);
+ ret &= map_fini (ptx_dev->null_stream);
free (ptx_dev->null_stream);
+ return ret;
}
/* Select a stream for (OpenACC-semantics) ASYNC argument for the current
{
r = cuStreamCreate (&s->stream, CU_STREAM_DEFAULT);
if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuStreamCreate error: %s", cuda_error (r));
+ {
+ pthread_mutex_unlock (&ptx_dev->stream_lock);
+ GOMP_PLUGIN_fatal ("cuStreamCreate error: %s",
+ cuda_error (r));
+ }
}
/* If CREATE is true, we're going to be queueing some work on this
s->d = (CUdeviceptr) NULL;
s->h = NULL;
- map_init (s);
+ if (!map_init (s))
+ {
+ pthread_mutex_unlock (&ptx_dev->stream_lock);
+ GOMP_PLUGIN_fatal ("map_init fail");
+ }
s->next = ptx_dev->active_streams;
ptx_dev->active_streams = s;
stream = ptx_dev->async_streams.arr[async];
}
else if (async < 0)
- GOMP_PLUGIN_fatal ("bad async %d", async);
+ {
+ if (create)
+ pthread_mutex_unlock (&ptx_dev->stream_lock);
+ GOMP_PLUGIN_fatal ("bad async %d", async);
+ }
if (create)
{
static bool
nvptx_init (void)
{
- CUresult r;
int ndevs;
if (instantiated_devices != 0)
return true;
- r = cuInit (0);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuInit error: %s", cuda_error (r));
-
+ CUDA_CALL (cuInit, 0);
ptx_events = NULL;
-
pthread_mutex_init (&ptx_event_lock, NULL);
- r = cuDeviceGetCount (&ndevs);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGetCount error: %s", cuda_error (r));
-
+ CUDA_CALL (cuDeviceGetCount, &ndevs);
ptx_devices = GOMP_PLUGIN_malloc_cleared (sizeof (struct ptx_device *)
* ndevs);
-
return true;
}
/* Select the N'th PTX device for the current host thread. The device must
have been previously opened before calling this function. */
-static void
+static bool
nvptx_attach_host_thread_to_device (int n)
{
CUdevice dev;
r = cuCtxGetDevice (&dev);
if (r != CUDA_SUCCESS && r != CUDA_ERROR_INVALID_CONTEXT)
- GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (r));
+ {
+ GOMP_PLUGIN_error ("cuCtxGetDevice error: %s", cuda_error (r));
+ return false;
+ }
if (r != CUDA_ERROR_INVALID_CONTEXT && dev == n)
- return;
+ return true;
else
{
CUcontext old_ctx;
ptx_dev = ptx_devices[n];
- assert (ptx_dev);
+ if (!ptx_dev)
+ {
+ GOMP_PLUGIN_error ("device %d not found", n);
+ return false;
+ }
- r = cuCtxGetCurrent (&thd_ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
+ CUDA_CALL (cuCtxGetCurrent, &thd_ctx);
/* We don't necessarily have a current context (e.g. if it has been
destroyed. Pop it if we do though. */
if (thd_ctx != NULL)
- {
- r = cuCtxPopCurrent (&old_ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxPopCurrent error: %s", cuda_error (r));
- }
+ CUDA_CALL (cuCtxPopCurrent, &old_ctx);
- r = cuCtxPushCurrent (ptx_dev->ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxPushCurrent error: %s", cuda_error (r));
+ CUDA_CALL (cuCtxPushCurrent, ptx_dev->ctx);
}
+ return true;
}
static struct ptx_device *
CUresult r;
int async_engines, pi;
- r = cuDeviceGet (&dev, n);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGet error: %s", cuda_error (r));
+ CUDA_CALL_ERET (NULL, cuDeviceGet, &dev, n);
ptx_dev = GOMP_PLUGIN_malloc (sizeof (struct ptx_device));
r = cuCtxGetDevice (&ctx_dev);
if (r != CUDA_SUCCESS && r != CUDA_ERROR_INVALID_CONTEXT)
- GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (r));
+ {
+ GOMP_PLUGIN_error ("cuCtxGetDevice error: %s", cuda_error (r));
+ return NULL;
+ }
if (r != CUDA_ERROR_INVALID_CONTEXT && ctx_dev != dev)
{
/* The current host thread has an active context for a different device.
Detach it. */
CUcontext old_ctx;
-
- r = cuCtxPopCurrent (&old_ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxPopCurrent error: %s", cuda_error (r));
+ CUDA_CALL_ERET (NULL, cuCtxPopCurrent, &old_ctx);
}
- r = cuCtxGetCurrent (&ptx_dev->ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
+ CUDA_CALL_ERET (NULL, cuCtxGetCurrent, &ptx_dev->ctx);
if (!ptx_dev->ctx)
- {
- r = cuCtxCreate (&ptx_dev->ctx, CU_CTX_SCHED_AUTO, dev);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxCreate error: %s", cuda_error (r));
- }
+ CUDA_CALL_ERET (NULL, cuCtxCreate, &ptx_dev->ctx, CU_CTX_SCHED_AUTO, dev);
else
ptx_dev->ctx_shared = true;
- r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, dev);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
-
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, dev);
ptx_dev->overlap = pi;
- r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
-
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev);
ptx_dev->map = pi;
- r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
-
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev);
ptx_dev->concur = pi;
- r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
-
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
ptx_dev->mode = pi;
- r = cuDeviceGetAttribute (&pi, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGetAttribute error: %s", cuda_error (r));
-
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute,
+ &pi, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev);
ptx_dev->mkern = pi;
r = cuDeviceGetAttribute (&async_engines,
ptx_dev->images = NULL;
pthread_mutex_init (&ptx_dev->image_lock, NULL);
- init_streams_for_device (ptx_dev, async_engines);
+ if (!init_streams_for_device (ptx_dev, async_engines))
+ return NULL;
return ptx_dev;
}
-static void
+static bool
nvptx_close_device (struct ptx_device *ptx_dev)
{
- CUresult r;
-
if (!ptx_dev)
- return;
+ return true;
- fini_streams_for_device (ptx_dev);
+ if (!fini_streams_for_device (ptx_dev))
+ return false;
pthread_mutex_destroy (&ptx_dev->image_lock);
if (!ptx_dev->ctx_shared)
- {
- r = cuCtxDestroy (ptx_dev->ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxDestroy error: %s", cuda_error (r));
- }
+ CUDA_CALL (cuCtxDestroy, ptx_dev->ctx);
free (ptx_dev);
+ return true;
}
static int
nvptx_get_num_devices (void)
{
int n;
- CUresult r;
/* PR libgomp/65099: Currently, we only support offloading in 64-bit
configurations. */
further initialization). */
if (instantiated_devices == 0)
{
- r = cuInit (0);
+ CUresult r = cuInit (0);
/* This is not an error: e.g. we may have CUDA libraries installed but
no devices available. */
if (r != CUDA_SUCCESS)
return 0;
}
- r = cuDeviceGetCount (&n);
- if (r!= CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuDeviceGetCount error: %s", cuda_error (r));
-
+ CUDA_CALL_ERET (-1, cuDeviceGetCount, &n);
return n;
}
-static void
+static bool
link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
unsigned num_objs)
{
opts[5] = CU_JIT_LOG_VERBOSE;
optvals[5] = (void *) 1;
- r = cuLinkCreate (6, opts, optvals, &linkstate);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
+ CUDA_CALL (cuLinkCreate, 6, opts, optvals, &linkstate);
for (; num_objs--; ptx_objs++)
{
if (r != CUDA_SUCCESS)
{
GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (ptx_code) error: %s",
+ GOMP_PLUGIN_error ("cuLinkAddData (ptx_code) error: %s",
cuda_error (r));
+ return false;
}
}
GOMP_PLUGIN_debug (0, "Link log %s\n", &ilog[0]);
if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuLinkComplete error: %s", cuda_error (r));
-
- r = cuModuleLoadData (module, linkout);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuModuleLoadData error: %s", cuda_error (r));
+ {
+ GOMP_PLUGIN_error ("cuLinkComplete error: %s", cuda_error (r));
+ return false;
+ }
- r = cuLinkDestroy (linkstate);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuLinkDestory error: %s", cuda_error (r));
+ CUDA_CALL (cuModuleLoadData, module, linkout);
+ CUDA_CALL (cuLinkDestroy, linkstate);
+ return true;
}
static void
/* Copy the (device) pointers to arguments to the device (dp and hp might in
fact have the same value on a unified-memory system). */
- r = cuMemcpy ((CUdeviceptr)dp, (CUdeviceptr)hp, mapnum * sizeof (void *));
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemcpy failed: %s", cuda_error (r));
-
+ CUDA_CALL_ASSERT (cuMemcpy, (CUdeviceptr) dp, (CUdeviceptr) hp,
+ mapnum * sizeof (void *));
GOMP_PLUGIN_debug (0, " %s: kernel %s: launch"
" gangs=%u, workers=%u, vectors=%u\n",
__FUNCTION__, targ_fn->launch->fn,
// vector length ntid.x
kargs[0] = &dp;
- r = cuLaunchKernel (function,
- dims[GOMP_DIM_GANG], 1, 1,
- dims[GOMP_DIM_VECTOR], dims[GOMP_DIM_WORKER], 1,
- 0, dev_str->stream, kargs, 0);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuLaunchKernel, function,
+ dims[GOMP_DIM_GANG], 1, 1,
+ dims[GOMP_DIM_VECTOR], dims[GOMP_DIM_WORKER], 1,
+ 0, dev_str->stream, kargs, 0);
#ifndef DISABLE_ASYNC
if (async < acc_async_noval)
event_gc (true);
- r = cuEventRecord (*e, dev_str->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuEventRecord, *e, dev_str->stream);
event_add (PTX_EVT_KNL, e, (void *)dev_str);
}
nvptx_alloc (size_t s)
{
CUdeviceptr d;
- CUresult r;
- r = cuMemAlloc (&d, s);
- if (r == CUDA_ERROR_OUT_OF_MEMORY)
- return 0;
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemAlloc error: %s", cuda_error (r));
- return (void *)d;
+ CUDA_CALL_ERET (NULL, cuMemAlloc, &d, s);
+ return (void *) d;
}
-static void
+static bool
nvptx_free (void *p)
{
- CUresult r;
CUdeviceptr pb;
size_t ps;
- r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)p);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r));
-
- if ((CUdeviceptr)p != pb)
- GOMP_PLUGIN_fatal ("invalid device address");
+ CUDA_CALL (cuMemGetAddressRange, &pb, &ps, (CUdeviceptr) p);
+ if ((CUdeviceptr) p != pb)
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
- r = cuMemFree ((CUdeviceptr)p);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemFree error: %s", cuda_error (r));
+ CUDA_CALL (cuMemFree, (CUdeviceptr) p);
+ return true;
}
-static void *
+
+static bool
nvptx_host2dev (void *d, const void *h, size_t s)
{
- CUresult r;
CUdeviceptr pb;
size_t ps;
struct nvptx_thread *nvthd = nvptx_thread ();
if (!s)
- return 0;
-
+ return true;
if (!d)
- GOMP_PLUGIN_fatal ("invalid device address");
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
- r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)d);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r));
+ CUDA_CALL (cuMemGetAddressRange, &pb, &ps, (CUdeviceptr) d);
if (!pb)
- GOMP_PLUGIN_fatal ("invalid device address");
-
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
if (!h)
- GOMP_PLUGIN_fatal ("invalid host address");
-
+ {
+ GOMP_PLUGIN_error ("invalid host address");
+ return false;
+ }
if (d == h)
- GOMP_PLUGIN_fatal ("invalid host or device address");
-
+ {
+ GOMP_PLUGIN_error ("invalid host or device address");
+ return false;
+ }
if ((void *)(d + s) > (void *)(pb + ps))
- GOMP_PLUGIN_fatal ("invalid size");
+ {
+ GOMP_PLUGIN_error ("invalid size");
+ return false;
+ }
#ifndef DISABLE_ASYNC
if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
{
- CUevent *e;
-
- e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
-
- r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
-
+ CUevent *e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
+ CUDA_CALL (cuEventCreate, e, CU_EVENT_DISABLE_TIMING);
event_gc (false);
-
- r = cuMemcpyHtoDAsync ((CUdeviceptr)d, h, s,
- nvthd->current_stream->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemcpyHtoDAsync error: %s", cuda_error (r));
-
- r = cuEventRecord (*e, nvthd->current_stream->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
-
+ CUDA_CALL (cuMemcpyHtoDAsync,
+ (CUdeviceptr) d, h, s, nvthd->current_stream->stream);
+ CUDA_CALL (cuEventRecord, *e, nvthd->current_stream->stream);
event_add (PTX_EVT_MEM, e, (void *)h);
}
else
#endif
- {
- r = cuMemcpyHtoD ((CUdeviceptr)d, h, s);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemcpyHtoD error: %s", cuda_error (r));
- }
+ CUDA_CALL (cuMemcpyHtoD, (CUdeviceptr) d, h, s);
- return 0;
+ return true;
}
-static void *
+static bool
nvptx_dev2host (void *h, const void *d, size_t s)
{
- CUresult r;
CUdeviceptr pb;
size_t ps;
struct nvptx_thread *nvthd = nvptx_thread ();
if (!s)
- return 0;
-
+ return true;
if (!d)
- GOMP_PLUGIN_fatal ("invalid device address");
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
- r = cuMemGetAddressRange (&pb, &ps, (CUdeviceptr)d);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemGetAddressRange error: %s", cuda_error (r));
+ CUDA_CALL (cuMemGetAddressRange, &pb, &ps, (CUdeviceptr) d);
if (!pb)
- GOMP_PLUGIN_fatal ("invalid device address");
-
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
if (!h)
- GOMP_PLUGIN_fatal ("invalid host address");
-
+ {
+ GOMP_PLUGIN_error ("invalid host address");
+ return false;
+ }
if (d == h)
- GOMP_PLUGIN_fatal ("invalid host or device address");
-
+ {
+ GOMP_PLUGIN_error ("invalid host or device address");
+ return false;
+ }
if ((void *)(d + s) > (void *)(pb + ps))
- GOMP_PLUGIN_fatal ("invalid size");
+ {
+ GOMP_PLUGIN_error ("invalid size");
+ return false;
+ }
#ifndef DISABLE_ASYNC
if (nvthd->current_stream != nvthd->ptx_dev->null_stream)
{
- CUevent *e;
-
- e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
-
- r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventCreate error: %s\n", cuda_error (r));
-
+ CUevent *e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
+ CUDA_CALL (cuEventCreate, e, CU_EVENT_DISABLE_TIMING);
event_gc (false);
-
- r = cuMemcpyDtoHAsync (h, (CUdeviceptr)d, s,
- nvthd->current_stream->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemcpyDtoHAsync error: %s", cuda_error (r));
-
- r = cuEventRecord (*e, nvthd->current_stream->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
-
+ CUDA_CALL (cuMemcpyDtoHAsync,
+ h, (CUdeviceptr) d, s, nvthd->current_stream->stream);
+ CUDA_CALL (cuEventRecord, *e, nvthd->current_stream->stream);
event_add (PTX_EVT_MEM, e, (void *)h);
}
else
#endif
- {
- r = cuMemcpyDtoH (h, (CUdeviceptr)d, s);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuMemcpyDtoH error: %s", cuda_error (r));
- }
+ CUDA_CALL (cuMemcpyDtoH, h, (CUdeviceptr) d, s);
- return 0;
+ return true;
}
static void
static void
nvptx_wait (int async)
{
- CUresult r;
struct ptx_stream *s;
s = select_stream_for_async (async, pthread_self (), false, NULL);
-
if (!s)
GOMP_PLUGIN_fatal ("unknown async %d", async);
- r = cuStreamSynchronize (s->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuStreamSynchronize, s->stream);
event_gc (true);
}
static void
nvptx_wait_async (int async1, int async2)
{
- CUresult r;
CUevent *e;
struct ptx_stream *s1, *s2;
pthread_t self = pthread_self ();
if (s1 == s2)
GOMP_PLUGIN_fatal ("identical parameters");
- e = (CUevent *)GOMP_PLUGIN_malloc (sizeof (CUevent));
+ e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
- r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuEventCreate, e, CU_EVENT_DISABLE_TIMING);
event_gc (true);
- r = cuEventRecord (*e, s1->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuEventRecord, *e, s1->stream);
event_add (PTX_EVT_SYNC, e, NULL);
- r = cuStreamWaitEvent (s2->stream, *e, 0);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuStreamWaitEvent, s2->stream, *e, 0);
}
static void
else if (r != CUDA_ERROR_NOT_READY)
GOMP_PLUGIN_fatal ("cuStreamQuery error: %s", cuda_error (r));
- r = cuStreamSynchronize (s->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuStreamSynchronize, s->stream);
}
}
static void
nvptx_wait_all_async (int async)
{
- CUresult r;
struct ptx_stream *waiting_stream, *other_stream;
CUevent *e;
struct nvptx_thread *nvthd = nvptx_thread ();
e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
- r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuEventCreate, e, CU_EVENT_DISABLE_TIMING);
/* Record an event on the waited-for stream. */
- r = cuEventRecord (*e, other_stream->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuEventRecord, *e, other_stream->stream);
event_add (PTX_EVT_SYNC, e, NULL);
- r = cuStreamWaitEvent (waiting_stream->stream, *e, 0);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuStreamWaitEvent error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuStreamWaitEvent, waiting_stream->stream, *e, 0);
}
pthread_mutex_unlock (&nvthd->ptx_dev->stream_lock);
pthread_t self = pthread_self ();
struct nvptx_thread *nvthd = nvptx_thread ();
- pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
-
if (async < 0)
GOMP_PLUGIN_fatal ("bad async %d", async);
+ pthread_mutex_lock (&nvthd->ptx_dev->stream_lock);
+
/* We have a list of active streams and an array mapping async values to
entries of that list. We need to take "ownership" of the passed-in stream,
and add it to our list, removing the previous entry also (if there was one)
s->next = s->next->next;
}
- cuStreamDestroy (oldstream->stream);
- map_fini (oldstream);
+ CUDA_CALL_ASSERT (cuStreamDestroy, oldstream->stream);
+
+ if (!map_fini (oldstream))
+ GOMP_PLUGIN_fatal ("error when freeing host memory");
+
free (oldstream);
}
return nvptx_get_num_devices ();
}
-void
+bool
GOMP_OFFLOAD_init_device (int n)
{
+ struct ptx_device *dev;
+
pthread_mutex_lock (&ptx_dev_lock);
if (!nvptx_init () || ptx_devices[n] != NULL)
{
pthread_mutex_unlock (&ptx_dev_lock);
- return;
+ return false;
}
- ptx_devices[n] = nvptx_open_device (n);
- instantiated_devices++;
+ dev = nvptx_open_device (n);
+ if (dev)
+ {
+ ptx_devices[n] = dev;
+ instantiated_devices++;
+ }
pthread_mutex_unlock (&ptx_dev_lock);
+
+ return dev != NULL;
}
-void
+bool
GOMP_OFFLOAD_fini_device (int n)
{
pthread_mutex_lock (&ptx_dev_lock);
if (ptx_devices[n] != NULL)
{
- nvptx_attach_host_thread_to_device (n);
- nvptx_close_device (ptx_devices[n]);
+ if (!nvptx_attach_host_thread_to_device (n)
+ || !nvptx_close_device (ptx_devices[n]))
+ {
+ pthread_mutex_unlock (&ptx_dev_lock);
+ return false;
+ }
ptx_devices[n] = NULL;
instantiated_devices--;
}
pthread_mutex_unlock (&ptx_dev_lock);
+ return true;
}
/* Return the libgomp version number we're compatible with. There is
const char *const *var_names;
const struct targ_fn_launch *fn_descs;
unsigned int fn_entries, var_entries, i, j;
- CUresult r;
struct targ_fn_descriptor *targ_fns;
struct addr_pair *targ_tbl;
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
struct ptx_device *dev;
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_NVIDIA_PTX)
- GOMP_PLUGIN_fatal ("Offload data incompatible with PTX plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version));
-
- GOMP_OFFLOAD_init_device (ord);
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with PTX plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version));
+ return -1;
+ }
- dev = ptx_devices[ord];
-
- nvptx_attach_host_thread_to_device (ord);
+ if (!nvptx_attach_host_thread_to_device (ord)
+ || !link_ptx (&module, img_header->ptx_objs, img_header->ptx_num))
+ return -1;
- link_ptx (&module, img_header->ptx_objs, img_header->ptx_num);
+ dev = ptx_devices[ord];
/* The mkoffload utility emits a struct of pointers/integers at the
start of each offload image. The array of kernel names and the
{
CUfunction function;
- r = cuModuleGetFunction (&function, module, fn_descs[i].fn);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuModuleGetFunction error: %s", cuda_error (r));
+ CUDA_CALL_ERET (-1, cuModuleGetFunction, &function, module,
+ fn_descs[i].fn);
targ_fns->fn = function;
targ_fns->launch = &fn_descs[i];
CUdeviceptr var;
size_t bytes;
- r = cuModuleGetGlobal (&var, &bytes, module, var_names[j]);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuModuleGetGlobal error: %s", cuda_error (r));
+ CUDA_CALL_ERET (-1, cuModuleGetGlobal,
+ &var, &bytes, module, var_names[j]);
targ_tbl->start = (uintptr_t) var;
targ_tbl->end = targ_tbl->start + bytes;
/* Unload the program described by TARGET_DATA. DEV_DATA is the
function descriptors allocated by G_O_load_image. */
-void
+bool
GOMP_OFFLOAD_unload_image (int ord, unsigned version, const void *target_data)
{
struct ptx_image_data *image, **prev_p;
struct ptx_device *dev = ptx_devices[ord];
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_NVIDIA_PTX)
- return;
-
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with PTX plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_NVIDIA_PTX, GOMP_VERSION_DEV (version));
+ return false;
+ }
+
+ bool ret = true;
pthread_mutex_lock (&dev->image_lock);
for (prev_p = &dev->images; (image = *prev_p) != 0; prev_p = &image->next)
if (image->target_data == target_data)
{
*prev_p = image->next;
- cuModuleUnload (image->module);
+ if (cuModuleUnload (image->module) != CUDA_SUCCESS)
+ ret = false;
free (image->fns);
free (image);
break;
}
pthread_mutex_unlock (&dev->image_lock);
+ return ret;
}
void *
GOMP_OFFLOAD_alloc (int ord, size_t size)
{
- nvptx_attach_host_thread_to_device (ord);
+ if (!nvptx_attach_host_thread_to_device (ord))
+ return NULL;
return nvptx_alloc (size);
}
-void
+bool
GOMP_OFFLOAD_free (int ord, void *ptr)
{
- nvptx_attach_host_thread_to_device (ord);
- nvptx_free (ptr);
+ return (nvptx_attach_host_thread_to_device (ord)
+ && nvptx_free (ptr));
}
-void *
+bool
GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
{
- nvptx_attach_host_thread_to_device (ord);
- return nvptx_dev2host (dst, src, n);
+ return (nvptx_attach_host_thread_to_device (ord)
+ && nvptx_dev2host (dst, src, n));
}
-void *
+bool
GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
{
- nvptx_attach_host_thread_to_device (ord);
- return nvptx_host2dev (dst, src, n);
+ return (nvptx_attach_host_thread_to_device (ord)
+ && nvptx_host2dev (dst, src, n));
}
void (*device_run) (int n, void *fn_ptr, void *vars) = NULL;
void
GOMP_OFFLOAD_openacc_register_async_cleanup (void *targ_mem_desc)
{
- CUevent *e;
- CUresult r;
struct nvptx_thread *nvthd = nvptx_thread ();
+ CUevent *e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
- e = (CUevent *) GOMP_PLUGIN_malloc (sizeof (CUevent));
-
- r = cuEventCreate (e, CU_EVENT_DISABLE_TIMING);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventCreate error: %s", cuda_error (r));
-
- r = cuEventRecord (*e, nvthd->current_stream->stream);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuEventRecord error: %s", cuda_error (r));
-
+ CUDA_CALL_ASSERT (cuEventCreate, e, CU_EVENT_DISABLE_TIMING);
+ CUDA_CALL_ASSERT (cuEventRecord, *e, nvthd->current_stream->stream);
event_add (PTX_EVT_ASYNC_CLEANUP, e, targ_mem_desc);
}
struct ptx_device *ptx_dev;
struct nvptx_thread *nvthd
= GOMP_PLUGIN_malloc (sizeof (struct nvptx_thread));
- CUresult r;
CUcontext thd_ctx;
ptx_dev = ptx_devices[ord];
assert (ptx_dev);
- r = cuCtxGetCurrent (&thd_ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxGetCurrent error: %s", cuda_error (r));
+ CUDA_CALL_ASSERT (cuCtxGetCurrent, &thd_ctx);
assert (ptx_dev->ctx);
if (!thd_ctx)
- {
- r = cuCtxPushCurrent (ptx_dev->ctx);
- if (r != CUDA_SUCCESS)
- GOMP_PLUGIN_fatal ("cuCtxPushCurrent error: %s", cuda_error (r));
- }
+ CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx);
nvthd->current_stream = ptx_dev->null_stream;
nvthd->ptx_dev = ptx_dev;
return n;
}
+static inline void
+gomp_device_copy (struct gomp_device_descr *devicep,
+ bool (*copy_func) (int, void *, const void *, size_t),
+ const char *dst, void *dstaddr,
+ const char *src, const void *srcaddr,
+ size_t size)
+{
+ if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
+ src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
+ }
+}
+
+static void
+gomp_copy_host2dev (struct gomp_device_descr *devicep,
+ void *d, const void *h, size_t sz)
+{
+ gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+}
+
+static void
+gomp_copy_dev2host (struct gomp_device_descr *devicep,
+ void *h, const void *d, size_t sz)
+{
+ gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
+}
+
+static void
+gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
+{
+ if (!devicep->free_func (devicep->target_id, devptr))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("error in freeing device memory block at %p", devptr);
+ }
+}
+
/* Handle the case where gomp_map_lookup, splay_tree_lookup or
gomp_map_0len_lookup found oldn for newn.
Helper function of gomp_map_vars. */
}
if (GOMP_MAP_ALWAYS_TO_P (kind))
- devicep->host2dev_func (devicep->target_id,
- (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
- + newn->host_start - oldn->host_start),
- (void *) newn->host_start,
- newn->host_end - newn->host_start);
+ gomp_copy_host2dev (devicep,
+ (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ + newn->host_start - oldn->host_start),
+ (void *) newn->host_start,
+ newn->host_end - newn->host_start);
+
if (oldn->refcount != REFCOUNT_INFINITY)
oldn->refcount++;
}
{
cur_node.tgt_offset = (uintptr_t) NULL;
/* FIXME: see comment about coalescing host/dev transfers below. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset,
+ sizeof (void *));
return;
}
/* Add bias to the pointer value. */
to initialize the pointer with. */
cur_node.tgt_offset -= bias;
/* FIXME: see comment about coalescing host/dev transfers below. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
+ gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
+ (void *) &cur_node.tgt_offset, sizeof (void *));
}
static void
memory. */
tgt->to_free = devicep->alloc_func (devicep->target_id,
tgt_size + tgt_align - 1);
+ if (!tgt->to_free)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("device memory allocation fail");
+ }
+
tgt->tgt_start = (uintptr_t) tgt->to_free;
tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
tgt->tgt_end = tgt->tgt_start + tgt_size;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt->list[i].offset = tgt_size;
len = sizes[i];
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start + tgt_size),
- (void *) hostaddrs[i], len);
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start + tgt_size),
+ (void *) hostaddrs[i], len);
tgt_size += len;
continue;
case GOMP_MAP_FIRSTPRIVATE_INT:
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
if (cur_node.tgt_offset)
cur_node.tgt_offset -= sizes[i];
- devicep->host2dev_func (devicep->target_id,
- (void *) (n->tgt->tgt_start
- + n->tgt_offset
- + cur_node.host_start
- - n->host_start),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
+ gomp_copy_host2dev (devicep,
+ (void *) (n->tgt->tgt_start
+ + n->tgt_offset
+ + cur_node.host_start
+ - n->host_start),
+ (void *) &cur_node.tgt_offset,
+ sizeof (void *));
cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start - n->host_start;
continue;
/* FIXME: Perhaps add some smarts, like if copying
several adjacent fields from host to target, use some
host buffer to avoid sending each var individually. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start
- + k->tgt_offset),
- (void *) k->host_start,
- k->host_end - k->host_start);
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start
+ + k->tgt_offset),
+ (void *) k->host_start,
+ k->host_end - k->host_start);
break;
case GOMP_MAP_POINTER:
gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
break;
case GOMP_MAP_TO_PSET:
/* FIXME: see above FIXME comment. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start
- + k->tgt_offset),
- (void *) k->host_start,
- k->host_end - k->host_start);
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start
+ + k->tgt_offset),
+ (void *) k->host_start,
+ k->host_end - k->host_start);
for (j = i + 1; j < mapnum; j++)
if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
break;
case GOMP_MAP_FORCE_DEVICEPTR:
assert (k->host_end - k->host_start == sizeof (void *));
-
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start
- + k->tgt_offset),
- (void *) k->host_start,
- sizeof (void *));
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start
+ + k->tgt_offset),
+ (void *) k->host_start,
+ sizeof (void *));
break;
default:
gomp_mutex_unlock (&devicep->lock);
{
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
/* FIXME: see above FIXME comment. */
- devicep->host2dev_func (devicep->target_id,
- (void *) (tgt->tgt_start
- + i * sizeof (void *)),
- (void *) &cur_node.tgt_offset,
- sizeof (void *));
+ gomp_copy_host2dev (devicep,
+ (void *) (tgt->tgt_start + i * sizeof (void *)),
+ (void *) &cur_node.tgt_offset, sizeof (void *));
}
}
{
/* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
if (tgt->tgt_end)
- tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
+ gomp_free_device_memory (tgt->device_descr, tgt->to_free);
free (tgt->array);
free (tgt);
{
splay_tree_key k = tgt->list[i].key;
if (tgt->list[i].copy_from)
- devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
- (void *) (k->tgt->tgt_start + k->tgt_offset),
- k->host_end - k->host_start);
+ gomp_copy_dev2host (devicep, (void *) k->host_start,
+ (void *) (k->tgt->tgt_start + k->tgt_offset),
+ k->host_end - k->host_start);
}
gomp_mutex_unlock (&devicep->lock);
if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|| tgt->list[i].always_copy_from)
- devicep->dev2host_func (devicep->target_id,
- (void *) (k->host_start + tgt->list[i].offset),
- (void *) (k->tgt->tgt_start + k->tgt_offset
- + tgt->list[i].offset),
- tgt->list[i].length);
+ gomp_copy_dev2host (devicep,
+ (void *) (k->host_start + tgt->list[i].offset),
+ (void *) (k->tgt->tgt_start + k->tgt_offset
+ + tgt->list[i].offset),
+ tgt->list[i].length);
if (do_unmap)
{
splay_tree_remove (&devicep->mem_map, k);
(void *) n->host_start,
(void *) n->host_end);
}
+
+
+ void *hostaddr = (void *) cur_node.host_start;
+ void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
+ + cur_node.host_start - n->host_start);
+ size_t size = cur_node.host_end - cur_node.host_start;
+
if (GOMP_MAP_COPY_TO_P (kind & typemask))
- devicep->host2dev_func (devicep->target_id,
- (void *) (n->tgt->tgt_start
- + n->tgt_offset
- + cur_node.host_start
- - n->host_start),
- (void *) cur_node.host_start,
- cur_node.host_end - cur_node.host_start);
+ gomp_copy_host2dev (devicep, devaddr, hostaddr, size);
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
- devicep->dev2host_func (devicep->target_id,
- (void *) cur_node.host_start,
- (void *) (n->tgt->tgt_start
- + n->tgt_offset
- + cur_node.host_start
- - n->host_start),
- cur_node.host_end - cur_node.host_start);
+ gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
}
}
gomp_mutex_unlock (&devicep->lock);
node = splay_tree_lookup (&devicep->mem_map, &k);
}
- devicep->unload_image_func (devicep->target_id, version, target_data);
+ if (!devicep->unload_image_func (devicep->target_id, version, target_data))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("image unload fail");
+ }
/* Remove mappings from splay tree. */
int i;
gomp_init_device (struct gomp_device_descr *devicep)
{
int i;
- devicep->init_device_func (devicep->target_id);
+ if (!devicep->init_device_func (devicep->target_id))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("device initialization failed");
+ }
/* Load to device all images registered by the moment. */
for (i = 0; i < num_offload_images; i++)
if ((kind == GOMP_MAP_FROM && k->refcount == 0)
|| kind == GOMP_MAP_ALWAYS_FROM)
- devicep->dev2host_func (devicep->target_id,
- (void *) cur_node.host_start,
- (void *) (k->tgt->tgt_start + k->tgt_offset
- + cur_node.host_start
- - k->host_start),
- cur_node.host_end - cur_node.host_start);
+ gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
+ (void *) (k->tgt->tgt_start + k->tgt_offset
+ + cur_node.host_start
+ - k->host_start),
+ cur_node.host_end - cur_node.host_start);
if (k->refcount == 0)
{
splay_tree_remove (&devicep->mem_map, k);
}
gomp_mutex_lock (&devicep->lock);
- devicep->free_func (devicep->target_id, device_ptr);
+ gomp_free_device_memory (devicep, device_ptr);
gomp_mutex_unlock (&devicep->lock);
}
size_t src_offset, int dst_device_num, int src_device_num)
{
struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
+ bool ret;
if (dst_device_num != GOMP_DEVICE_HOST_FALLBACK)
{
if (src_devicep == NULL)
{
gomp_mutex_lock (&dst_devicep->lock);
- dst_devicep->host2dev_func (dst_devicep->target_id,
- (char *) dst + dst_offset,
- (char *) src + src_offset, length);
+ ret = dst_devicep->host2dev_func (dst_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
gomp_mutex_unlock (&dst_devicep->lock);
- return 0;
+ return (ret ? 0 : EINVAL);
}
if (dst_devicep == NULL)
{
gomp_mutex_lock (&src_devicep->lock);
- src_devicep->dev2host_func (src_devicep->target_id,
- (char *) dst + dst_offset,
- (char *) src + src_offset, length);
+ ret = src_devicep->dev2host_func (src_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
gomp_mutex_unlock (&src_devicep->lock);
- return 0;
+ return (ret ? 0 : EINVAL);
}
if (src_devicep == dst_devicep)
{
gomp_mutex_lock (&src_devicep->lock);
- src_devicep->dev2dev_func (src_devicep->target_id,
- (char *) dst + dst_offset,
- (char *) src + src_offset, length);
+ ret = src_devicep->dev2dev_func (src_devicep->target_id,
+ (char *) dst + dst_offset,
+ (char *) src + src_offset, length);
gomp_mutex_unlock (&src_devicep->lock);
- return 0;
+ return (ret ? 0 : EINVAL);
}
return EINVAL;
}
|| __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
return EINVAL;
if (dst_devicep == NULL && src_devicep == NULL)
- memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+ {
+ memcpy ((char *) dst + dst_off, (char *) src + src_off, length);
+ ret = 1;
+ }
else if (src_devicep == NULL)
- dst_devicep->host2dev_func (dst_devicep->target_id,
- (char *) dst + dst_off,
- (char *) src + src_off, length);
+ ret = dst_devicep->host2dev_func (dst_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
else if (dst_devicep == NULL)
- src_devicep->dev2host_func (src_devicep->target_id,
- (char *) dst + dst_off,
- (char *) src + src_off, length);
+ ret = src_devicep->dev2host_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
else if (src_devicep == dst_devicep)
- src_devicep->dev2dev_func (src_devicep->target_id,
- (char *) dst + dst_off,
- (char *) src + src_off, length);
+ ret = src_devicep->dev2dev_func (src_devicep->target_id,
+ (char *) dst + dst_off,
+ (char *) src + src_off, length);
else
- return EINVAL;
- return 0;
+ ret = 0;
+ return ret ? 0 : EINVAL;
}
/* FIXME: it would be nice to have some plugin function to handle
int i;
for (i = 0; i < num_devices; i++)
{
+ bool ret = true;
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->state == GOMP_DEVICE_INITIALIZED)
{
- devicep->fini_device_func (devicep->target_id);
+ ret = devicep->fini_device_func (devicep->target_id);
devicep->state = GOMP_DEVICE_FINALIZED;
}
gomp_mutex_unlock (&devicep->lock);
+ if (!ret)
+ gomp_fatal ("device finalization failed");
}
}
+2016-05-26 Chung-Lin Tang <cltang@codesourcery.com>
+
+ * plugin/libgomp-plugin-intelmic.cpp (offload): Change return type
+ to bool, adjust return code.
+ (GOMP_OFFLOAD_init_device): Likewise.
+ (GOMP_OFFLOAD_fini_device): Likewise.
+ (get_target_table): Likewise.
+ (offload_image): Likwise.
+ (GOMP_OFFLOAD_load_image): Adjust call to offload_image(), change
+ to return -1 on error.
+ (GOMP_OFFLOAD_unload_image): Change return type to bool, adjust return
+ code.
+ (GOMP_OFFLOAD_alloc): Likewise.
+ (GOMP_OFFLOAD_free): Likewise.
+ (GOMP_OFFLOAD_host2dev): Likewise.
+ (GOMP_OFFLOAD_dev2host): Likewise.
+ (GOMP_OFFLOAD_dev2dev): Likewise.
+
2016-01-20 Ilya Verbin <ilya.verbin@intel.com>
* plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_run): Pass extra NULL
return num_devices;
}
-static void
+static bool
offload (const char *file, uint64_t line, int device, const char *name,
int num_vars, VarDesc *vars, const void **async_data)
{
if (ofld)
{
if (async_data == NULL)
- __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0, NULL, NULL);
+ return __offload_offload1 (ofld, name, 0, num_vars, vars, NULL, 0,
+ NULL, NULL);
else
{
OffloadFlags flags;
flags.flags = 0;
flags.bits.omp_async = 1;
- __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0, NULL,
- async_data, 0, NULL, flags, NULL);
+ return __offload_offload3 (ofld, name, 0, num_vars, vars, NULL, 0,
+ NULL, async_data, 0, NULL, flags, NULL);
}
}
else
{
- fprintf (stderr, "%s:%d: Offload target acquire failed\n", file, line);
- exit (1);
+ GOMP_PLUGIN_error ("%s:%d: Offload target acquire failed\n", file, line);
+ return false;
}
}
/* liboffloadmic loads and runs offload_target_main on all available devices
during a first call to offload (). */
-extern "C" void
+extern "C" bool
GOMP_OFFLOAD_init_device (int device)
{
TRACE ("(device = %d)", device);
pthread_once (&main_image_is_registered, register_main_image);
- offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0, NULL,
- NULL);
+ return offload (__FILE__, __LINE__, device, "__offload_target_init_proc", 0,
+ NULL, NULL);
}
-extern "C" void
+extern "C" bool
GOMP_OFFLOAD_fini_device (int device)
{
TRACE ("(device = %d)", device);
/* liboffloadmic will finalize target processes on all available devices. */
__offload_unregister_image (&main_target_image);
+ return true;
}
-static void
+static bool
get_target_table (int device, int &num_funcs, int &num_vars, void **&table)
{
VarDesc vd1[2] = { vd_tgt2host, vd_tgt2host };
vd1[1].ptr = &num_vars;
vd1[1].size = sizeof (num_vars);
- offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2, vd1,
- NULL);
+ if (!offload (__FILE__, __LINE__, device, "__offload_target_table_p1", 2,
+ vd1, NULL))
+ return false;
int table_size = num_funcs + 2 * num_vars;
if (table_size > 0)
vd2.ptr = table;
vd2.size = table_size * sizeof (void *);
- offload (__FILE__, __LINE__, device, "__offload_target_table_p2", 1, &vd2,
- NULL);
+ return offload (__FILE__, __LINE__, device, "__offload_target_table_p2",
+ 1, &vd2, NULL);
}
+ return true;
}
/* Offload TARGET_IMAGE to all available devices and fill address_table with
corresponding target addresses. */
-static void
+static bool
offload_image (const void *target_image)
{
void *image_start = ((void **) target_image)[0];
+ image_size);
if (!image)
{
- fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
- exit (1);
+ GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
+ return false;
}
image->size = image_size;
/* Receive tables for target_image from all devices. */
DevAddrVect dev_table;
+ bool ret = true;
for (int dev = 0; dev < num_devices; dev++)
{
int num_funcs = 0;
int num_vars = 0;
void **table = NULL;
- get_target_table (dev, num_funcs, num_vars, table);
+ ret &= get_target_table (dev, num_funcs, num_vars, table);
AddrVect curr_dev_table;
address_table->insert (std::make_pair (target_image, dev_table));
image_descriptors->insert (std::make_pair (target_image, image));
+ return ret;
}
/* Return the libgomp version number we're compatible with. There is
TRACE ("(device = %d, target_image = %p)", device, target_image);
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
- GOMP_PLUGIN_fatal ("Offload data incompatible with intelmic plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
+ return -1;
+ }
/* If target_image is already present in address_table, then there is no need
to offload it. */
if (address_table->count (target_image) == 0)
- offload_image (target_image);
+ {
+ /* If fail, return -1 as error code. */
+ if (!offload_image (target_image))
+ return -1;
+ }
AddrVect *curr_dev_table = &(*address_table)[target_image][device];
int table_size = curr_dev_table->size ();
addr_pair *table = (addr_pair *) malloc (table_size * sizeof (addr_pair));
if (table == NULL)
{
- fprintf (stderr, "%s: Can't allocate memory\n", __FILE__);
- exit (1);
+ GOMP_PLUGIN_error ("%s: Can't allocate memory\n", __FILE__);
+ return -1;
}
std::copy (curr_dev_table->begin (), curr_dev_table->end (), table);
return table_size;
}
-extern "C" void
+extern "C" bool
GOMP_OFFLOAD_unload_image (int device, unsigned version,
const void *target_image)
{
if (GOMP_VERSION_DEV (version) > GOMP_VERSION_INTEL_MIC)
- return;
+ {
+ GOMP_PLUGIN_error ("Offload data incompatible with intelmic plugin"
+ " (expected %u, received %u)",
+ GOMP_VERSION_INTEL_MIC, GOMP_VERSION_DEV (version));
+ return false;
+ }
TRACE ("(device = %d, target_image = %p)", device, target_image);
address_table->erase (target_image);
image_descriptors->erase (target_image);
}
+ return true;
}
extern "C" void *
vd[1].ptr = &tgt_ptr;
vd[1].size = sizeof (void *);
- offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2, vd, NULL);
+ if (!offload (__FILE__, __LINE__, device, "__offload_target_alloc", 2,
+ vd, NULL))
+ return NULL;
return tgt_ptr;
}
-extern "C" void
+extern "C" bool
GOMP_OFFLOAD_free (int device, void *tgt_ptr)
{
TRACE ("(device = %d, tgt_ptr = %p)", device, tgt_ptr);
vd.ptr = &tgt_ptr;
vd.size = sizeof (void *);
- offload (__FILE__, __LINE__, device, "__offload_target_free", 1, &vd, NULL);
+ return offload (__FILE__, __LINE__, device, "__offload_target_free", 1,
+ &vd, NULL);
}
-extern "C" void *
+extern "C" bool
GOMP_OFFLOAD_host2dev (int device, void *tgt_ptr, const void *host_ptr,
size_t size)
{
TRACE ("(device = %d, tgt_ptr = %p, host_ptr = %p, size = %d)",
device, tgt_ptr, host_ptr, size);
if (!size)
- return tgt_ptr;
+ return true;
VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
vd1[0].ptr = &tgt_ptr;
vd1[1].ptr = &size;
vd1[1].size = sizeof (size);
- offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2, vd1,
- NULL);
+ if (!offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p1", 2,
+ vd1, NULL))
+ return false;
VarDesc vd2 = vd_host2tgt;
vd2.ptr = (void *) host_ptr;
vd2.size = size;
- offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1, &vd2,
- NULL);
-
- return tgt_ptr;
+ return offload (__FILE__, __LINE__, device, "__offload_target_host2tgt_p2", 1,
+ &vd2, NULL);
}
-extern "C" void *
+extern "C" bool
GOMP_OFFLOAD_dev2host (int device, void *host_ptr, const void *tgt_ptr,
size_t size)
{
TRACE ("(device = %d, host_ptr = %p, tgt_ptr = %p, size = %d)",
device, host_ptr, tgt_ptr, size);
if (!size)
- return host_ptr;
+ return true;
VarDesc vd1[2] = { vd_host2tgt, vd_host2tgt };
vd1[0].ptr = &tgt_ptr;
vd1[1].ptr = &size;
vd1[1].size = sizeof (size);
- offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2, vd1,
- NULL);
+ if (!offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p1", 2,
+ vd1, NULL))
+ return false;
VarDesc vd2 = vd_tgt2host;
vd2.ptr = (void *) host_ptr;
vd2.size = size;
- offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1, &vd2,
- NULL);
-
- return host_ptr;
+ return offload (__FILE__, __LINE__, device, "__offload_target_tgt2host_p2", 1,
+ &vd2, NULL);
}
-extern "C" void *
+extern "C" bool
GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr,
size_t size)
{
TRACE ("(device = %d, dst_ptr = %p, src_ptr = %p, size = %d)",
device, dst_ptr, src_ptr, size);
if (!size)
- return dst_ptr;
+ return true;
VarDesc vd[3] = { vd_host2tgt, vd_host2tgt, vd_host2tgt };
vd[0].ptr = &dst_ptr;
vd[2].ptr = &size;
vd[2].size = sizeof (size);
- offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3, vd, NULL);
-
- return dst_ptr;
+ return offload (__FILE__, __LINE__, device, "__offload_target_tgt2tgt", 3,
+ vd, NULL);
}
extern "C" void