From a218658556a2961af26af9c046aae23b7f58fcc8 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Wed, 22 May 2019 22:34:09 +0200 Subject: [PATCH] clover: implement SVM functions for devices with fine grained system SVM support all of the functionality can be mapped to malloc/free if the device supports fine grained system SVM. v2: fix some API bugs found with the OpenCL CTS v3: remove validate_even_wait_list improve implementation of clSetKernelExecInfo make clEnqueueSVMFree spec compliant rename can_emulate_non_system_svm to has_system_svm and make it a member method improve validation in clEnqueueSVMMemFill handle CL_MEM_USES_SVM_POINTER in clGetMemObjectInfo v4: break long lines and other minor cosmetic adjustments Signed-off-by: Karol Herbst Reviewed-by: Francisco Jerez Part-of: --- .../state_trackers/clover/api/kernel.cpp | 36 ++++- .../state_trackers/clover/api/memory.cpp | 69 +++++++- .../state_trackers/clover/api/transfer.cpp | 153 +++++++++++++++++- .../state_trackers/clover/core/device.cpp | 5 +- .../state_trackers/clover/core/device.hpp | 5 + .../state_trackers/clover/util/pointer.hpp | 11 ++ 6 files changed, 259 insertions(+), 20 deletions(-) diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp index 38a5cc10454..962a2b59e16 100644 --- a/src/gallium/state_trackers/clover/api/kernel.cpp +++ b/src/gallium/state_trackers/clover/api/kernel.cpp @@ -352,7 +352,37 @@ CLOVER_API cl_int clSetKernelExecInfo(cl_kernel d_kern, cl_kernel_exec_info param_name, size_t param_value_size, - const void *param_value) { - CLOVER_NOT_SUPPORTED_UNTIL("2.0"); - return CL_INVALID_VALUE; + const void *param_value) try { + auto &kern = obj(d_kern); + const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm), + kern.program().context().devices()); + + if (!param_value) + return CL_INVALID_VALUE; + + switch (param_name) { + case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: { + if (param_value_size != sizeof(cl_bool)) + return CL_INVALID_VALUE; + + cl_bool val = *static_cast(param_value); + if (val == CL_TRUE && !has_system_svm) + return CL_INVALID_OPERATION; + else + return CL_SUCCESS; + } + + case CL_KERNEL_EXEC_INFO_SVM_PTRS: + if (has_system_svm) + return CL_SUCCESS; + + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); + return CL_INVALID_VALUE; + + default: + return CL_INVALID_VALUE; + } + +} catch (error &e) { + return e.get(); } diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp index 6a0717680cf..107815bde72 100644 --- a/src/gallium/state_trackers/clover/api/memory.cpp +++ b/src/gallium/state_trackers/clover/api/memory.cpp @@ -29,15 +29,20 @@ using namespace clover; namespace { cl_mem_flags - validate_flags(cl_mem d_parent, cl_mem_flags d_flags) { + validate_flags(cl_mem d_parent, cl_mem_flags d_flags, bool svm) { const cl_mem_flags dev_access_flags = CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY; const cl_mem_flags host_ptr_flags = CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR; const cl_mem_flags host_access_flags = CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; + const cl_mem_flags svm_flags = + CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS; + const cl_mem_flags valid_flags = - dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags); + dev_access_flags + | (svm || d_parent ? 0 : host_ptr_flags) + | (svm ? svm_flags : host_access_flags); if ((d_flags & ~valid_flags) || util_bitcount(d_flags & dev_access_flags) > 1 || @@ -48,6 +53,10 @@ namespace { (d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) throw error(CL_INVALID_VALUE); + if ((d_flags & CL_MEM_SVM_ATOMICS) && + !(d_flags & CL_MEM_SVM_FINE_GRAIN_BUFFER)) + throw error(CL_INVALID_VALUE); + if (d_parent) { const auto &parent = obj(d_parent); const cl_mem_flags flags = (d_flags | @@ -77,7 +86,7 @@ namespace { CLOVER_API cl_mem clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size, void *host_ptr, cl_int *r_errcode) try { - const cl_mem_flags flags = validate_flags(NULL, d_flags); + const cl_mem_flags flags = validate_flags(NULL, d_flags, false); auto &ctx = obj(d_ctx); if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | @@ -103,7 +112,7 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags, cl_buffer_create_type op, const void *op_info, cl_int *r_errcode) try { auto &parent = obj(d_mem); - const cl_mem_flags flags = validate_flags(d_mem, d_flags); + const cl_mem_flags flags = validate_flags(d_mem, d_flags, false); if (op == CL_BUFFER_CREATE_TYPE_REGION) { auto reg = reinterpret_cast(op_info); @@ -163,7 +172,7 @@ clCreateImage(cl_context d_ctx, cl_mem_flags d_flags, CL_MEM_COPY_HOST_PTR))) throw error(CL_INVALID_HOST_PTR); - const cl_mem_flags flags = validate_flags(desc->buffer, d_flags); + const cl_mem_flags flags = validate_flags(desc->buffer, d_flags, false); if (!supported_formats(ctx, desc->image_type).count(*format)) throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); @@ -249,7 +258,7 @@ clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags, auto &ctx = obj(d_ctx); auto formats = supported_formats(ctx, type); - validate_flags(NULL, flags); + validate_flags(NULL, flags, false); if (r_buf && !r_count) throw error(CL_INVALID_VALUE); @@ -313,6 +322,15 @@ clGetMemObjectInfo(cl_mem d_mem, cl_mem_info param, buf.as_scalar() = (sub ? sub->offset() : 0); break; } + case CL_MEM_USES_SVM_POINTER: { + // with system SVM all host ptrs are SVM pointers + // TODO: once we support devices with lower levels of SVM, we have to + // check the ptr in more detail + const bool system_svm = all_of(std::mem_fn(&device::has_system_svm), + mem.context().devices()); + buf.as_scalar() = mem.host_ptr() && system_svm; + break; + } default: throw error(CL_INVALID_VALUE); } @@ -431,13 +449,48 @@ CLOVER_API void * clSVMAlloc(cl_context d_ctx, cl_svm_mem_flags flags, size_t size, - unsigned int alignment) { + unsigned int alignment) try { + auto &ctx = obj(d_ctx); + validate_flags(NULL, flags, true); + + if (!size || + size > fold(minimum(), cl_ulong(ULONG_MAX), + map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices()))) + return nullptr; + + if (!util_is_power_of_two_or_zero(alignment)) + return nullptr; + + if (!alignment) + alignment = 0x80; // sizeof(long16) + + bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices()); + if (can_emulate) { + // we can ignore all the flags as it's not required to honor them. + void *ptr = nullptr; + if (alignment < sizeof(void*)) + alignment = sizeof(void*); + posix_memalign(&ptr, alignment, size); + return ptr; + } + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); return nullptr; + +} catch (error &e) { + return nullptr; } CLOVER_API void clSVMFree(cl_context d_ctx, - void *svm_pointer) { + void *svm_pointer) try { + auto &ctx = obj(d_ctx); + bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices()); + + if (can_emulate) + return free(svm_pointer); + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); + +} catch (error &e) { } diff --git a/src/gallium/state_trackers/clover/api/transfer.cpp b/src/gallium/state_trackers/clover/api/transfer.cpp index 879b8a9d762..d1f540769dd 100644 --- a/src/gallium/state_trackers/clover/api/transfer.cpp +++ b/src/gallium/state_trackers/clover/api/transfer.cpp @@ -22,6 +22,8 @@ #include +#include "util/bitscan.h" + #include "api/util.hpp" #include "core/event.hpp" #include "core/memory.hpp" @@ -769,13 +771,47 @@ CLOVER_API cl_int clEnqueueSVMFree(cl_command_queue d_q, cl_uint num_svm_pointers, void *svm_pointers[], - void (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[], void *user_data), + void (CL_CALLBACK *pfn_free_func) ( + cl_command_queue queue, cl_uint num_svm_pointers, + void *svm_pointers[], void *user_data), void *user_data, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) { - CLOVER_NOT_SUPPORTED_UNTIL("2.0"); - return CL_INVALID_VALUE; + cl_event *event) try { + if (bool(num_svm_pointers) != bool(svm_pointers)) + return CL_INVALID_VALUE; + + auto &q = obj(d_q); + bool can_emulate = q.device().has_system_svm(); + auto deps = objs(event_wait_list, num_events_in_wait_list); + + validate_common(q, deps); + + std::vector svm_pointers_cpy(svm_pointers, + svm_pointers + num_svm_pointers); + if (!pfn_free_func) { + if (!can_emulate) { + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); + return CL_INVALID_VALUE; + } + pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers, + void *svm_pointers[], void *) { + for (void *p : range(svm_pointers, num_svm_pointers)) + free(p); + }; + } + + auto hev = create(q, CL_COMMAND_SVM_FREE, deps, + [=](clover::event &) mutable { + pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(), + user_data); + }); + + ret_object(event, hev); + return CL_SUCCESS; + +} catch (error &e) { + return e.get(); } CLOVER_API cl_int @@ -786,9 +822,38 @@ clEnqueueSVMMemcpy(cl_command_queue d_q, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) { + cl_event *event) try { + + if (dst_ptr == nullptr || src_ptr == nullptr) + return CL_INVALID_VALUE; + + if (static_cast(abs(reinterpret_cast(dst_ptr) - + reinterpret_cast(src_ptr))) < size) + return CL_MEM_COPY_OVERLAP; + + auto &q = obj(d_q); + bool can_emulate = q.device().has_system_svm(); + auto deps = objs(event_wait_list, num_events_in_wait_list); + + validate_common(q, deps); + + if (can_emulate) { + auto hev = create(q, CL_COMMAND_SVM_MEMCPY, deps, + [=](clover::event &) { + memcpy(dst_ptr, src_ptr, size); + }); + + if (blocking_copy) + hev().wait(); + ret_object(event, hev); + return CL_SUCCESS; + } + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); return CL_INVALID_VALUE; + +} catch (error &e) { + return e.get(); } CLOVER_API cl_int @@ -799,9 +864,39 @@ clEnqueueSVMMemFill(cl_command_queue d_q, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) { + cl_event *event) try { + if (svm_ptr == nullptr || pattern == nullptr || + !util_is_power_of_two_nonzero(pattern_size) || + pattern_size > 128 || + !ptr_is_aligned(svm_ptr, pattern_size) || + size % pattern_size) + return CL_INVALID_VALUE; + + auto &q = obj(d_q); + bool can_emulate = q.device().has_system_svm(); + auto deps = objs(event_wait_list, num_events_in_wait_list); + + validate_common(q, deps); + + if (can_emulate) { + auto hev = create(q, CL_COMMAND_SVM_MEMFILL, deps, + [=](clover::event &) { + void *ptr = svm_ptr; + for (size_t s = size; s; s -= pattern_size) { + memcpy(ptr, pattern, pattern_size); + ptr = static_cast(ptr) + pattern_size; + } + }); + + ret_object(event, hev); + return CL_SUCCESS; + } + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); return CL_INVALID_VALUE; + +} catch (error &e) { + return e.get(); } CLOVER_API cl_int @@ -812,9 +907,30 @@ clEnqueueSVMMap(cl_command_queue d_q, size_t size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) { + cl_event *event) try { + + if (svm_ptr == nullptr || size == 0) + return CL_INVALID_VALUE; + + auto &q = obj(d_q); + bool can_emulate = q.device().has_system_svm(); + auto deps = objs(event_wait_list, num_events_in_wait_list); + + validate_common(q, deps); + + if (can_emulate) { + auto hev = create(q, CL_COMMAND_SVM_MAP, deps, + [](clover::event &) { }); + + ret_object(event, hev); + return CL_SUCCESS; + } + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); return CL_INVALID_VALUE; + +} catch (error &e) { + return e.get(); } CLOVER_API cl_int @@ -822,9 +938,30 @@ clEnqueueSVMUnmap(cl_command_queue d_q, void *svm_ptr, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, - cl_event *event) { + cl_event *event) try { + + if (svm_ptr == nullptr) + return CL_INVALID_VALUE; + + auto &q = obj(d_q); + bool can_emulate = q.device().has_system_svm(); + auto deps = objs(event_wait_list, num_events_in_wait_list); + + validate_common(q, deps); + + if (can_emulate) { + auto hev = create(q, CL_COMMAND_SVM_UNMAP, deps, + [](clover::event &) { }); + + ret_object(event, hev); + return CL_SUCCESS; + } + CLOVER_NOT_SUPPORTED_UNTIL("2.0"); return CL_INVALID_VALUE; + +} catch (error &e) { + return e.get(); } CLOVER_API cl_int diff --git a/src/gallium/state_trackers/clover/core/device.cpp b/src/gallium/state_trackers/clover/core/device.cpp index e05dc562189..9a81ac61fae 100644 --- a/src/gallium/state_trackers/clover/core/device.cpp +++ b/src/gallium/state_trackers/clover/core/device.cpp @@ -239,7 +239,10 @@ device::svm_support() const { // and SVM pointer into the same kernel at the same time. if (pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) && pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM)) - return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM; + // we can emulate all lower levels if we support fine grain system + return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | + CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | + CL_DEVICE_SVM_FINE_GRAIN_BUFFER; return 0; } diff --git a/src/gallium/state_trackers/clover/core/device.hpp b/src/gallium/state_trackers/clover/core/device.hpp index dc9064bb638..597f9489b2c 100644 --- a/src/gallium/state_trackers/clover/core/device.hpp +++ b/src/gallium/state_trackers/clover/core/device.hpp @@ -95,6 +95,11 @@ namespace clover { clover::platform &platform; + inline bool + has_system_svm() const { + return svm_support() & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM; + } + private: pipe_screen *pipe; pipe_loader_device *ldev; diff --git a/src/gallium/state_trackers/clover/util/pointer.hpp b/src/gallium/state_trackers/clover/util/pointer.hpp index aa119a4b82b..7bb9951aef6 100644 --- a/src/gallium/state_trackers/clover/util/pointer.hpp +++ b/src/gallium/state_trackers/clover/util/pointer.hpp @@ -26,6 +26,17 @@ #include namespace clover { + /// + /// Some helper functions for raw pointer operations + /// + template + static bool + ptr_is_aligned(const T *ptr, uintptr_t a) noexcept { + assert(a == (a & -a)); + uintptr_t ptr_value = reinterpret_cast(ptr); + return (ptr_value & (a - 1)) == 0; + } + /// /// Base class for objects that support reference counting. /// -- 2.30.2