clSetKernelExecInfo(cl_kernel d_kern,
cl_kernel_exec_info param_name,
size_t param_value_size,
- const void *param_value) {
- CLOVER_NOT_SUPPORTED_UNTIL("2.0");
- return CL_INVALID_VALUE;
+ const void *param_value) try {
+ auto &kern = obj(d_kern);
+ const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),
+ kern.program().context().devices());
+
+ if (!param_value)
+ return CL_INVALID_VALUE;
+
+ switch (param_name) {
+ case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: {
+ if (param_value_size != sizeof(cl_bool))
+ return CL_INVALID_VALUE;
+
+ cl_bool val = *static_cast<const cl_bool*>(param_value);
+ if (val == CL_TRUE && !has_system_svm)
+ return CL_INVALID_OPERATION;
+ else
+ return CL_SUCCESS;
+ }
+
+ case CL_KERNEL_EXEC_INFO_SVM_PTRS:
+ if (has_system_svm)
+ return CL_SUCCESS;
+
+ CLOVER_NOT_SUPPORTED_UNTIL("2.0");
+ return CL_INVALID_VALUE;
+
+ default:
+ return CL_INVALID_VALUE;
+ }
+
+} catch (error &e) {
+ return e.get();
}
namespace {
cl_mem_flags
- validate_flags(cl_mem d_parent, cl_mem_flags d_flags) {
+ validate_flags(cl_mem d_parent, cl_mem_flags d_flags, bool svm) {
const cl_mem_flags dev_access_flags =
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
const cl_mem_flags host_ptr_flags =
CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
const cl_mem_flags host_access_flags =
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
+ const cl_mem_flags svm_flags =
+ CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS;
+
const cl_mem_flags valid_flags =
- dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags);
+ dev_access_flags
+ | (svm || d_parent ? 0 : host_ptr_flags)
+ | (svm ? svm_flags : host_access_flags);
if ((d_flags & ~valid_flags) ||
util_bitcount(d_flags & dev_access_flags) > 1 ||
(d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
throw error(CL_INVALID_VALUE);
+ if ((d_flags & CL_MEM_SVM_ATOMICS) &&
+ !(d_flags & CL_MEM_SVM_FINE_GRAIN_BUFFER))
+ throw error(CL_INVALID_VALUE);
+
if (d_parent) {
const auto &parent = obj(d_parent);
const cl_mem_flags flags = (d_flags |
CLOVER_API cl_mem
clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
void *host_ptr, cl_int *r_errcode) try {
- const cl_mem_flags flags = validate_flags(NULL, d_flags);
+ const cl_mem_flags flags = validate_flags(NULL, d_flags, false);
auto &ctx = obj(d_ctx);
if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
cl_buffer_create_type op,
const void *op_info, cl_int *r_errcode) try {
auto &parent = obj<root_buffer>(d_mem);
- const cl_mem_flags flags = validate_flags(d_mem, d_flags);
+ const cl_mem_flags flags = validate_flags(d_mem, d_flags, false);
if (op == CL_BUFFER_CREATE_TYPE_REGION) {
auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
CL_MEM_COPY_HOST_PTR)))
throw error(CL_INVALID_HOST_PTR);
- const cl_mem_flags flags = validate_flags(desc->buffer, d_flags);
+ const cl_mem_flags flags = validate_flags(desc->buffer, d_flags, false);
if (!supported_formats(ctx, desc->image_type).count(*format))
throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
auto &ctx = obj(d_ctx);
auto formats = supported_formats(ctx, type);
- validate_flags(NULL, flags);
+ validate_flags(NULL, flags, false);
if (r_buf && !r_count)
throw error(CL_INVALID_VALUE);
buf.as_scalar<size_t>() = (sub ? sub->offset() : 0);
break;
}
+ case CL_MEM_USES_SVM_POINTER: {
+ // with system SVM all host ptrs are SVM pointers
+ // TODO: once we support devices with lower levels of SVM, we have to
+ // check the ptr in more detail
+ const bool system_svm = all_of(std::mem_fn(&device::has_system_svm),
+ mem.context().devices());
+ buf.as_scalar<cl_bool>() = mem.host_ptr() && system_svm;
+ break;
+ }
default:
throw error(CL_INVALID_VALUE);
}
clSVMAlloc(cl_context d_ctx,
cl_svm_mem_flags flags,
size_t size,
- unsigned int alignment) {
+ unsigned int alignment) try {
+ auto &ctx = obj(d_ctx);
+ validate_flags(NULL, flags, true);
+
+ if (!size ||
+ size > fold(minimum(), cl_ulong(ULONG_MAX),
+ map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices())))
+ return nullptr;
+
+ if (!util_is_power_of_two_or_zero(alignment))
+ return nullptr;
+
+ if (!alignment)
+ alignment = 0x80; // sizeof(long16)
+
+ bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
+ if (can_emulate) {
+ // we can ignore all the flags as it's not required to honor them.
+ void *ptr = nullptr;
+ if (alignment < sizeof(void*))
+ alignment = sizeof(void*);
+ posix_memalign(&ptr, alignment, size);
+ return ptr;
+ }
+
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return nullptr;
+
+} catch (error &e) {
+ return nullptr;
}
CLOVER_API void
clSVMFree(cl_context d_ctx,
- void *svm_pointer) {
+ void *svm_pointer) try {
+ auto &ctx = obj(d_ctx);
+ bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
+
+ if (can_emulate)
+ return free(svm_pointer);
+
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
+
+} catch (error &e) {
}
#include <cstring>
+#include "util/bitscan.h"
+
#include "api/util.hpp"
#include "core/event.hpp"
#include "core/memory.hpp"
clEnqueueSVMFree(cl_command_queue d_q,
cl_uint num_svm_pointers,
void *svm_pointers[],
- void (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[], void *user_data),
+ void (CL_CALLBACK *pfn_free_func) (
+ cl_command_queue queue, cl_uint num_svm_pointers,
+ void *svm_pointers[], void *user_data),
void *user_data,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
- cl_event *event) {
- CLOVER_NOT_SUPPORTED_UNTIL("2.0");
- return CL_INVALID_VALUE;
+ cl_event *event) try {
+ if (bool(num_svm_pointers) != bool(svm_pointers))
+ return CL_INVALID_VALUE;
+
+ auto &q = obj(d_q);
+ bool can_emulate = q.device().has_system_svm();
+ auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+ validate_common(q, deps);
+
+ std::vector<void *> svm_pointers_cpy(svm_pointers,
+ svm_pointers + num_svm_pointers);
+ if (!pfn_free_func) {
+ if (!can_emulate) {
+ CLOVER_NOT_SUPPORTED_UNTIL("2.0");
+ return CL_INVALID_VALUE;
+ }
+ pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
+ void *svm_pointers[], void *) {
+ for (void *p : range(svm_pointers, num_svm_pointers))
+ free(p);
+ };
+ }
+
+ auto hev = create<hard_event>(q, CL_COMMAND_SVM_FREE, deps,
+ [=](clover::event &) mutable {
+ pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
+ user_data);
+ });
+
+ ret_object(event, hev);
+ return CL_SUCCESS;
+
+} catch (error &e) {
+ return e.get();
}
CLOVER_API cl_int
size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
- cl_event *event) {
+ cl_event *event) try {
+
+ if (dst_ptr == nullptr || src_ptr == nullptr)
+ return CL_INVALID_VALUE;
+
+ if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
+ reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
+ return CL_MEM_COPY_OVERLAP;
+
+ auto &q = obj(d_q);
+ bool can_emulate = q.device().has_system_svm();
+ auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+ validate_common(q, deps);
+
+ if (can_emulate) {
+ auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMCPY, deps,
+ [=](clover::event &) {
+ memcpy(dst_ptr, src_ptr, size);
+ });
+
+ if (blocking_copy)
+ hev().wait();
+ ret_object(event, hev);
+ return CL_SUCCESS;
+ }
+
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
+
+} catch (error &e) {
+ return e.get();
}
CLOVER_API cl_int
size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
- cl_event *event) {
+ cl_event *event) try {
+ if (svm_ptr == nullptr || pattern == nullptr ||
+ !util_is_power_of_two_nonzero(pattern_size) ||
+ pattern_size > 128 ||
+ !ptr_is_aligned(svm_ptr, pattern_size) ||
+ size % pattern_size)
+ return CL_INVALID_VALUE;
+
+ auto &q = obj(d_q);
+ bool can_emulate = q.device().has_system_svm();
+ auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+ validate_common(q, deps);
+
+ if (can_emulate) {
+ auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMFILL, deps,
+ [=](clover::event &) {
+ void *ptr = svm_ptr;
+ for (size_t s = size; s; s -= pattern_size) {
+ memcpy(ptr, pattern, pattern_size);
+ ptr = static_cast<uint8_t*>(ptr) + pattern_size;
+ }
+ });
+
+ ret_object(event, hev);
+ return CL_SUCCESS;
+ }
+
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
+
+} catch (error &e) {
+ return e.get();
}
CLOVER_API cl_int
size_t size,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
- cl_event *event) {
+ cl_event *event) try {
+
+ if (svm_ptr == nullptr || size == 0)
+ return CL_INVALID_VALUE;
+
+ auto &q = obj(d_q);
+ bool can_emulate = q.device().has_system_svm();
+ auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+ validate_common(q, deps);
+
+ if (can_emulate) {
+ auto hev = create<hard_event>(q, CL_COMMAND_SVM_MAP, deps,
+ [](clover::event &) { });
+
+ ret_object(event, hev);
+ return CL_SUCCESS;
+ }
+
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
+
+} catch (error &e) {
+ return e.get();
}
CLOVER_API cl_int
void *svm_ptr,
cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
- cl_event *event) {
+ cl_event *event) try {
+
+ if (svm_ptr == nullptr)
+ return CL_INVALID_VALUE;
+
+ auto &q = obj(d_q);
+ bool can_emulate = q.device().has_system_svm();
+ auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+ validate_common(q, deps);
+
+ if (can_emulate) {
+ auto hev = create<hard_event>(q, CL_COMMAND_SVM_UNMAP, deps,
+ [](clover::event &) { });
+
+ ret_object(event, hev);
+ return CL_SUCCESS;
+ }
+
CLOVER_NOT_SUPPORTED_UNTIL("2.0");
return CL_INVALID_VALUE;
+
+} catch (error &e) {
+ return e.get();
}
CLOVER_API cl_int
// and SVM pointer into the same kernel at the same time.
if (pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) &&
pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
- return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
+ // we can emulate all lower levels if we support fine grain system
+ return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
+ CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
+ CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
return 0;
}
clover::platform &platform;
+ inline bool
+ has_system_svm() const {
+ return svm_support() & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
+ }
+
private:
pipe_screen *pipe;
pipe_loader_device *ldev;
#include <atomic>
namespace clover {
+ ///
+ /// Some helper functions for raw pointer operations
+ ///
+ template <class T>
+ static bool
+ ptr_is_aligned(const T *ptr, uintptr_t a) noexcept {
+ assert(a == (a & -a));
+ uintptr_t ptr_value = reinterpret_cast<uintptr_t>(ptr);
+ return (ptr_value & (a - 1)) == 0;
+ }
+
///
/// Base class for objects that support reference counting.
///