This function returns what device type will be used when executing a
parallel or kernels region.
+This function returns @code{acc_device_none} if
+@code{acc_get_device_type} is called from
+@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
+callbacks of the OpenACC Profiling Interface (@ref{OpenACC Profiling
+Interface}), that is, if the device is currently being initialized.
+
@item @emph{C/C++}:
@multitable @columnfractions .20 .80
@item @emph{Prototype}: @tab @code{acc_device_t acc_get_device_type(void);}
We're not yet accounting for the fact that @cite{OpenACC events may
occur during event processing}.
+We just handle one case specially, as required by CUDA 9.0
+@command{nvprof}, that @code{acc_get_device_type}
+(@ref{acc_get_device_type})) may be called from
+@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
+callbacks.
We're not yet implementing initialization via a
@code{acc_register_library} function that is either statically linked
static gomp_mutex_t acc_device_lock;
+static gomp_mutex_t acc_init_state_lock;
+static enum { uninitialized, initializing, initialized } acc_init_state
+ = uninitialized;
+static pthread_t acc_init_thread;
+
/* A cached version of the dispatcher for the global "current" accelerator type,
e.g. used as the default when creating new host threads. This is the
device-type equivalent of goacc_device_num (which specifies which device to
static struct gomp_device_descr *
acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit)
{
+ gomp_mutex_lock (&acc_init_state_lock);
+ acc_init_state = initializing;
+ acc_init_thread = pthread_self ();
+ gomp_mutex_unlock (&acc_init_state_lock);
+
bool check_not_nested_p;
if (implicit)
{
&api_info);
}
+ /* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that a
+ nested 'acc_get_device_type' called from a profiling callback still sees
+ 'initializing', so that we don't deadlock when it then again tries to lock
+ 'goacc_prof_lock'. See also the discussion in 'acc_get_device_type'. */
+ gomp_mutex_lock (&acc_init_state_lock);
+ acc_init_state = initialized;
+ gomp_mutex_unlock (&acc_init_state_lock);
+
return base_dev;
}
ialias (acc_set_device_type)
+static bool
+self_initializing_p (void)
+{
+ bool res;
+ gomp_mutex_lock (&acc_init_state_lock);
+ res = (acc_init_state == initializing
+ && pthread_equal (acc_init_thread, pthread_self ()));
+ gomp_mutex_unlock (&acc_init_state_lock);
+ return res;
+}
+
acc_device_t
acc_get_device_type (void)
{
if (thr && thr->base_dev)
res = acc_device_type (thr->base_dev->type);
+ else if (self_initializing_p ())
+ /* The Cuda libaccinj64.so version 9.0+ calls acc_get_device_type during the
+ acc_ev_device_init_start event callback, which is dispatched during
+ acc_init_1. Trying to lock acc_device_lock during such a call (as we do
+ in the else clause below), will result in deadlock, since the lock has
+ already been taken by the acc_init_1 caller. We work around this problem
+ by using the acc_get_device_type property "If the device type has not yet
+ been selected, the value acc_device_none may be returned". */
+ ;
else
{
acc_prof_info prof_info;
--- /dev/null
+/* { dg-do run } */
+/* { dg-timeout 10 } */
+
+/* Test the calling of 'acc_get_device_type' from within
+ 'cb_device_init_start' and 'cb_device_init_end' callbacks. This occurs
+ when the CUDA 9.0 'nvprof' tool is used, and previously deadlocked. */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <acc_prof.h>
+
+static acc_prof_reg reg;
+static acc_prof_reg unreg;
+static acc_prof_lookup_func lookup;
+
+void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
+{
+ reg = reg_;
+ unreg = unreg_;
+ lookup = lookup_;
+}
+
+static bool expect_cb_device_init_start;
+static bool expect_cb_device_init_end;
+
+static void cb_device_init_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
+{
+ assert (expect_cb_device_init_start);
+ expect_cb_device_init_start = false;
+
+ acc_device_t acc_device_type;
+ acc_device_type = acc_get_device_type ();
+ assert (acc_device_type == acc_device_none);
+
+ expect_cb_device_init_end = true;
+}
+
+static void cb_device_init_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
+{
+ assert (expect_cb_device_init_end);
+ expect_cb_device_init_end = false;
+
+ acc_device_t acc_device_type;
+ acc_device_type = acc_get_device_type ();
+ assert (acc_device_type == acc_device_none);
+}
+
+int main(void)
+{
+ acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
+
+ reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
+ reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
+
+ expect_cb_device_init_start = true;
+ expect_cb_device_init_end = false;
+ acc_init (acc_device_host);
+ assert (!expect_cb_device_init_start);
+ assert (!expect_cb_device_init_end);
+ {
+ acc_device_t acc_device_type;
+ acc_device_type = acc_get_device_type ();
+ assert (acc_device_type == acc_device_host);
+ }
+ acc_shutdown (acc_device_host);
+
+ expect_cb_device_init_start = true;
+ expect_cb_device_init_end = false;
+ acc_init (acc_device_default);
+ assert (!expect_cb_device_init_start);
+ assert (!expect_cb_device_init_end);
+ {
+ acc_device_t acc_device_type;
+ acc_device_type = acc_get_device_type ();
+ assert (acc_device_type != acc_device_none);
+ }
+ acc_shutdown (acc_device_default);
+
+ return 0;
+}