From 8e09a12f016e53f1edadc10db22806937d3b8894 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Wed, 8 Aug 2018 14:26:37 +0000 Subject: [PATCH] [libgomp, nvptx] Fall back to cuLinkAddData/cuLinkCreate if _v2 not found Cuda driver api functions cuLinkAddData and cuLinkCreate are available starting version 5.5. In version 6.5, they are remapped onto _v2 versions. The dlopen interface of the libgomp nvptx plugin uses the _v2 versions, so it won't work with a cuda driver with driver api version lower than 6.5. This patch fixes the problem by testing for the presence of the _v2 versions, and falling back to the original versions in case of absence of the _v2 versions. Build on x86_64 with nvptx accelerator and reg-tested libgomp, both with and without --without-cuda-driver. 2018-08-08 Tom de Vries * plugin/cuda-lib.def (cuLinkAddData_v2, cuLinkCreate_v2): Declare using CUDA_ONE_CALL_MAYBE_NULL. * plugin/plugin-nvptx.c (cuLinkAddData, cuLinkCreate): Undef and declare. (cuLinkAddData_v2, cuLinkCreate_v2): Declare. (link_ptx): Fall back to cuLinkAddData/cuLinkCreate if the _v2 versions are not found. From-SVN: r263408 --- libgomp/ChangeLog | 9 +++++++++ libgomp/plugin/cuda-lib.def | 2 ++ libgomp/plugin/plugin-nvptx.c | 28 ++++++++++++++++++++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 605c84c2286..4aff6cd9a33 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,12 @@ +2018-08-08 Tom de Vries + + * plugin/cuda-lib.def (cuLinkAddData_v2, cuLinkCreate_v2): Declare using + CUDA_ONE_CALL_MAYBE_NULL. + * plugin/plugin-nvptx.c (cuLinkAddData, cuLinkCreate): Undef and declare. + (cuLinkAddData_v2, cuLinkCreate_v2): Declare. + (link_ptx): Fall back to cuLinkAddData/cuLinkCreate if the _v2 versions + are not found. + 2018-08-08 Tom de Vries * plugin/cuda-lib.def (cuGetErrorString): Use CUDA_ONE_CALL_MAYBE_NULL. diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def index 6365cdbfcbe..29028b504a0 100644 --- a/libgomp/plugin/cuda-lib.def +++ b/libgomp/plugin/cuda-lib.def @@ -19,8 +19,10 @@ CUDA_ONE_CALL_MAYBE_NULL (cuGetErrorString) CUDA_ONE_CALL (cuInit) CUDA_ONE_CALL (cuLaunchKernel) CUDA_ONE_CALL (cuLinkAddData) +CUDA_ONE_CALL_MAYBE_NULL (cuLinkAddData_v2) CUDA_ONE_CALL (cuLinkComplete) CUDA_ONE_CALL (cuLinkCreate) +CUDA_ONE_CALL_MAYBE_NULL (cuLinkCreate_v2) CUDA_ONE_CALL (cuLinkDestroy) CUDA_ONE_CALL (cuMemAlloc) CUDA_ONE_CALL (cuMemAllocHost) diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index b549b774003..6799a264976 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -54,6 +54,18 @@ extern CUresult cuGetErrorString (CUresult, const char **); #define CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR 82 #endif +#if CUDA_VERSION >= 6050 +#undef cuLinkCreate +#undef cuLinkAddData +CUresult cuLinkAddData (CUlinkState, CUjitInputType, void *, size_t, + const char *, unsigned, CUjit_option *, void **); +CUresult cuLinkCreate (unsigned, CUjit_option *, void **, CUlinkState *); +#else +CUresult cuLinkAddData_v2 (CUlinkState, CUjitInputType, void *, size_t, + const char *, unsigned, CUjit_option *, void **); +CUresult cuLinkCreate_v2 (unsigned, CUjit_option *, void **, CUlinkState *); +#endif + #define DO_PRAGMA(x) _Pragma (#x) #if PLUGIN_NVPTX_DYNAMIC @@ -938,16 +950,24 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs, nopts++; } - CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate); + if (CUDA_CALL_EXISTS (cuLinkCreate_v2)) + CUDA_CALL (cuLinkCreate_v2, nopts, opts, optvals, &linkstate); + else + CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate); for (; num_objs--; ptx_objs++) { /* cuLinkAddData's 'data' argument erroneously omits the const qualifier. */ GOMP_PLUGIN_debug (0, "Loading:\n---\n%s\n---\n", ptx_objs->code); - r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX, - (char *) ptx_objs->code, ptx_objs->size, - 0, 0, 0, 0); + if (CUDA_CALL_EXISTS (cuLinkAddData_v2)) + r = CUDA_CALL_NOCHECK (cuLinkAddData_v2, linkstate, CU_JIT_INPUT_PTX, + (char *) ptx_objs->code, ptx_objs->size, + 0, 0, 0, 0); + else + r = CUDA_CALL_NOCHECK (cuLinkAddData, linkstate, CU_JIT_INPUT_PTX, + (char *) ptx_objs->code, ptx_objs->size, + 0, 0, 0, 0); if (r != CUDA_SUCCESS) { GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); -- 2.30.2