clover: implement SVM functions for devices with fine grained system SVM support
authorKarol Herbst <kherbst@redhat.com>
Wed, 22 May 2019 20:34:09 +0000 (22:34 +0200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 15 Apr 2020 11:08:13 +0000 (11:08 +0000)
all of the functionality can be mapped to malloc/free if the device supports
fine grained system SVM.

v2: fix some API bugs found with the OpenCL CTS
v3: remove validate_even_wait_list
    improve implementation of clSetKernelExecInfo
    make clEnqueueSVMFree spec compliant
    rename can_emulate_non_system_svm to has_system_svm and make it a member method
    improve validation in clEnqueueSVMMemFill
    handle CL_MEM_USES_SVM_POINTER in clGetMemObjectInfo
v4: break long lines and other minor cosmetic adjustments

Signed-off-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2076>

src/gallium/state_trackers/clover/api/kernel.cpp
src/gallium/state_trackers/clover/api/memory.cpp
src/gallium/state_trackers/clover/api/transfer.cpp
src/gallium/state_trackers/clover/core/device.cpp
src/gallium/state_trackers/clover/core/device.hpp
src/gallium/state_trackers/clover/util/pointer.hpp

index 38a5cc10454dba0fb870591aef26b3e50859cf57..962a2b59e16f5d7349befede05b8b17f5dc79a89 100644 (file)
@@ -352,7 +352,37 @@ CLOVER_API cl_int
 clSetKernelExecInfo(cl_kernel d_kern,
                     cl_kernel_exec_info param_name,
                     size_t param_value_size,
-                    const void *param_value) {
-   CLOVER_NOT_SUPPORTED_UNTIL("2.0");
-   return CL_INVALID_VALUE;
+                    const void *param_value) try {
+   auto &kern = obj(d_kern);
+   const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),
+                                      kern.program().context().devices());
+
+   if (!param_value)
+      return CL_INVALID_VALUE;
+
+   switch (param_name) {
+   case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: {
+      if (param_value_size != sizeof(cl_bool))
+         return CL_INVALID_VALUE;
+
+      cl_bool val = *static_cast<const cl_bool*>(param_value);
+      if (val == CL_TRUE && !has_system_svm)
+         return CL_INVALID_OPERATION;
+      else
+         return CL_SUCCESS;
+   }
+
+   case CL_KERNEL_EXEC_INFO_SVM_PTRS:
+      if (has_system_svm)
+         return CL_SUCCESS;
+
+      CLOVER_NOT_SUPPORTED_UNTIL("2.0");
+      return CL_INVALID_VALUE;
+
+   default:
+      return CL_INVALID_VALUE;
+   }
+
+} catch (error &e) {
+   return e.get();
 }
index 6a0717680cfaffd6391c640551c6e09308e6470e..107815bde721aaeaf480dac5c544b7dbab37eca1 100644 (file)
@@ -29,15 +29,20 @@ using namespace clover;
 
 namespace {
    cl_mem_flags
-   validate_flags(cl_mem d_parent, cl_mem_flags d_flags) {
+   validate_flags(cl_mem d_parent, cl_mem_flags d_flags, bool svm) {
       const cl_mem_flags dev_access_flags =
          CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY;
       const cl_mem_flags host_ptr_flags =
          CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR;
       const cl_mem_flags host_access_flags =
          CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
+      const cl_mem_flags svm_flags =
+         CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS;
+
       const cl_mem_flags valid_flags =
-         dev_access_flags | host_access_flags | (d_parent ? 0 : host_ptr_flags);
+         dev_access_flags
+            | (svm || d_parent ? 0 : host_ptr_flags)
+            | (svm ? svm_flags : host_access_flags);
 
       if ((d_flags & ~valid_flags) ||
           util_bitcount(d_flags & dev_access_flags) > 1 ||
@@ -48,6 +53,10 @@ namespace {
           (d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR)))
          throw error(CL_INVALID_VALUE);
 
+      if ((d_flags & CL_MEM_SVM_ATOMICS) &&
+          !(d_flags & CL_MEM_SVM_FINE_GRAIN_BUFFER))
+         throw error(CL_INVALID_VALUE);
+
       if (d_parent) {
          const auto &parent = obj(d_parent);
          const cl_mem_flags flags = (d_flags |
@@ -77,7 +86,7 @@ namespace {
 CLOVER_API cl_mem
 clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size,
                void *host_ptr, cl_int *r_errcode) try {
-   const cl_mem_flags flags = validate_flags(NULL, d_flags);
+   const cl_mem_flags flags = validate_flags(NULL, d_flags, false);
    auto &ctx = obj(d_ctx);
 
    if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR |
@@ -103,7 +112,7 @@ clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags,
                   cl_buffer_create_type op,
                   const void *op_info, cl_int *r_errcode) try {
    auto &parent = obj<root_buffer>(d_mem);
-   const cl_mem_flags flags = validate_flags(d_mem, d_flags);
+   const cl_mem_flags flags = validate_flags(d_mem, d_flags, false);
 
    if (op == CL_BUFFER_CREATE_TYPE_REGION) {
       auto reg = reinterpret_cast<const cl_buffer_region *>(op_info);
@@ -163,7 +172,7 @@ clCreateImage(cl_context d_ctx, cl_mem_flags d_flags,
                                          CL_MEM_COPY_HOST_PTR)))
       throw error(CL_INVALID_HOST_PTR);
 
-   const cl_mem_flags flags = validate_flags(desc->buffer, d_flags);
+   const cl_mem_flags flags = validate_flags(desc->buffer, d_flags, false);
 
    if (!supported_formats(ctx, desc->image_type).count(*format))
       throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED);
@@ -249,7 +258,7 @@ clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags,
    auto &ctx = obj(d_ctx);
    auto formats = supported_formats(ctx, type);
 
-   validate_flags(NULL, flags);
+   validate_flags(NULL, flags, false);
 
    if (r_buf && !r_count)
       throw error(CL_INVALID_VALUE);
@@ -313,6 +322,15 @@ clGetMemObjectInfo(cl_mem d_mem, cl_mem_info param,
       buf.as_scalar<size_t>() = (sub ? sub->offset() : 0);
       break;
    }
+   case CL_MEM_USES_SVM_POINTER: {
+      // with system SVM all host ptrs are SVM pointers
+      // TODO: once we support devices with lower levels of SVM, we have to
+      // check the ptr in more detail
+      const bool system_svm = all_of(std::mem_fn(&device::has_system_svm),
+                                     mem.context().devices());
+      buf.as_scalar<cl_bool>() = mem.host_ptr() && system_svm;
+      break;
+   }
    default:
       throw error(CL_INVALID_VALUE);
    }
@@ -431,13 +449,48 @@ CLOVER_API void *
 clSVMAlloc(cl_context d_ctx,
            cl_svm_mem_flags flags,
            size_t size,
-           unsigned int alignment) {
+           unsigned int alignment) try {
+   auto &ctx = obj(d_ctx);
+   validate_flags(NULL, flags, true);
+
+   if (!size ||
+       size > fold(minimum(), cl_ulong(ULONG_MAX),
+                   map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices())))
+      return nullptr;
+
+   if (!util_is_power_of_two_or_zero(alignment))
+      return nullptr;
+
+   if (!alignment)
+      alignment = 0x80; // sizeof(long16)
+
+   bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
+   if (can_emulate) {
+      // we can ignore all the flags as it's not required to honor them.
+      void *ptr = nullptr;
+      if (alignment < sizeof(void*))
+         alignment = sizeof(void*);
+      posix_memalign(&ptr, alignment, size);
+      return ptr;
+   }
+
    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
    return nullptr;
+
+} catch (error &e) {
+   return nullptr;
 }
 
 CLOVER_API void
 clSVMFree(cl_context d_ctx,
-          void *svm_pointer) {
+          void *svm_pointer) try {
+   auto &ctx = obj(d_ctx);
+   bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices());
+
+   if (can_emulate)
+      return free(svm_pointer);
+
    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
+
+} catch (error &e) {
 }
index 879b8a9d762e8c02875e90a2c03f88e7fe51c130..d1f540769dd419d80258e68a4a0d0b07e8842afa 100644 (file)
@@ -22,6 +22,8 @@
 
 #include <cstring>
 
+#include "util/bitscan.h"
+
 #include "api/util.hpp"
 #include "core/event.hpp"
 #include "core/memory.hpp"
@@ -769,13 +771,47 @@ CLOVER_API cl_int
 clEnqueueSVMFree(cl_command_queue d_q,
                  cl_uint num_svm_pointers,
                  void *svm_pointers[],
-                 void (CL_CALLBACK *pfn_free_func) (cl_command_queue queue, cl_uint num_svm_pointers, void *svm_pointers[], void *user_data),
+                 void (CL_CALLBACK *pfn_free_func) (
+                    cl_command_queue queue, cl_uint num_svm_pointers,
+                    void *svm_pointers[], void *user_data),
                  void *user_data,
                  cl_uint num_events_in_wait_list,
                  const cl_event *event_wait_list,
-                 cl_event *event) {
-   CLOVER_NOT_SUPPORTED_UNTIL("2.0");
-   return CL_INVALID_VALUE;
+                 cl_event *event) try {
+   if (bool(num_svm_pointers) != bool(svm_pointers))
+      return CL_INVALID_VALUE;
+
+   auto &q = obj(d_q);
+   bool can_emulate = q.device().has_system_svm();
+   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+   validate_common(q, deps);
+
+   std::vector<void *> svm_pointers_cpy(svm_pointers,
+                                        svm_pointers + num_svm_pointers);
+   if (!pfn_free_func) {
+      if (!can_emulate) {
+         CLOVER_NOT_SUPPORTED_UNTIL("2.0");
+         return CL_INVALID_VALUE;
+      }
+      pfn_free_func = [](cl_command_queue, cl_uint num_svm_pointers,
+                         void *svm_pointers[], void *) {
+         for (void *p : range(svm_pointers, num_svm_pointers))
+            free(p);
+      };
+   }
+
+   auto hev = create<hard_event>(q, CL_COMMAND_SVM_FREE, deps,
+      [=](clover::event &) mutable {
+         pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),
+                       user_data);
+      });
+
+   ret_object(event, hev);
+   return CL_SUCCESS;
+
+} catch (error &e) {
+   return e.get();
 }
 
 CLOVER_API cl_int
@@ -786,9 +822,38 @@ clEnqueueSVMMemcpy(cl_command_queue d_q,
                    size_t size,
                    cl_uint num_events_in_wait_list,
                    const cl_event *event_wait_list,
-                   cl_event *event) {
+                   cl_event *event) try {
+
+   if (dst_ptr == nullptr || src_ptr == nullptr)
+      return CL_INVALID_VALUE;
+
+   if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -
+                               reinterpret_cast<ptrdiff_t>(src_ptr))) < size)
+      return CL_MEM_COPY_OVERLAP;
+
+   auto &q = obj(d_q);
+   bool can_emulate = q.device().has_system_svm();
+   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+   validate_common(q, deps);
+
+   if (can_emulate) {
+      auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMCPY, deps,
+         [=](clover::event &) {
+            memcpy(dst_ptr, src_ptr, size);
+         });
+
+      if (blocking_copy)
+         hev().wait();
+      ret_object(event, hev);
+      return CL_SUCCESS;
+   }
+
    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
    return CL_INVALID_VALUE;
+
+} catch (error &e) {
+   return e.get();
 }
 
 CLOVER_API cl_int
@@ -799,9 +864,39 @@ clEnqueueSVMMemFill(cl_command_queue d_q,
                     size_t size,
                     cl_uint num_events_in_wait_list,
                     const cl_event *event_wait_list,
-                    cl_event *event) {
+                    cl_event *event) try {
+   if (svm_ptr == nullptr || pattern == nullptr ||
+       !util_is_power_of_two_nonzero(pattern_size) ||
+       pattern_size > 128 ||
+       !ptr_is_aligned(svm_ptr, pattern_size) ||
+       size % pattern_size)
+      return CL_INVALID_VALUE;
+
+   auto &q = obj(d_q);
+   bool can_emulate = q.device().has_system_svm();
+   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+   validate_common(q, deps);
+
+   if (can_emulate) {
+      auto hev = create<hard_event>(q, CL_COMMAND_SVM_MEMFILL, deps,
+         [=](clover::event &) {
+            void *ptr = svm_ptr;
+            for (size_t s = size; s; s -= pattern_size) {
+               memcpy(ptr, pattern, pattern_size);
+               ptr = static_cast<uint8_t*>(ptr) + pattern_size;
+            }
+         });
+
+      ret_object(event, hev);
+      return CL_SUCCESS;
+   }
+
    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
    return CL_INVALID_VALUE;
+
+} catch (error &e) {
+   return e.get();
 }
 
 CLOVER_API cl_int
@@ -812,9 +907,30 @@ clEnqueueSVMMap(cl_command_queue d_q,
                 size_t size,
                 cl_uint num_events_in_wait_list,
                 const cl_event *event_wait_list,
-                cl_event *event) {
+                cl_event *event) try {
+
+   if (svm_ptr == nullptr || size == 0)
+      return CL_INVALID_VALUE;
+
+   auto &q = obj(d_q);
+   bool can_emulate = q.device().has_system_svm();
+   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+   validate_common(q, deps);
+
+   if (can_emulate) {
+      auto hev = create<hard_event>(q, CL_COMMAND_SVM_MAP, deps,
+         [](clover::event &) { });
+
+      ret_object(event, hev);
+      return CL_SUCCESS;
+   }
+
    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
    return CL_INVALID_VALUE;
+
+} catch (error &e) {
+   return e.get();
 }
 
 CLOVER_API cl_int
@@ -822,9 +938,30 @@ clEnqueueSVMUnmap(cl_command_queue d_q,
                   void *svm_ptr,
                   cl_uint num_events_in_wait_list,
                   const cl_event *event_wait_list,
-                  cl_event *event) {
+                  cl_event *event) try {
+
+   if (svm_ptr == nullptr)
+      return CL_INVALID_VALUE;
+
+   auto &q = obj(d_q);
+   bool can_emulate = q.device().has_system_svm();
+   auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);
+
+   validate_common(q, deps);
+
+   if (can_emulate) {
+      auto hev = create<hard_event>(q, CL_COMMAND_SVM_UNMAP, deps,
+         [](clover::event &) { });
+
+      ret_object(event, hev);
+      return CL_SUCCESS;
+   }
+
    CLOVER_NOT_SUPPORTED_UNTIL("2.0");
    return CL_INVALID_VALUE;
+
+} catch (error &e) {
+   return e.get();
 }
 
 CLOVER_API cl_int
index e05dc5621899eb95f52bc850e5d3c510aa511548..9a81ac61fae3a8609f1addde277dfc4f42beafc0 100644 (file)
@@ -239,7 +239,10 @@ device::svm_support() const {
    // and SVM pointer into the same kernel at the same time.
    if (pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) &&
        pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))
-      return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
+      // we can emulate all lower levels if we support fine grain system
+      return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |
+             CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |
+             CL_DEVICE_SVM_FINE_GRAIN_BUFFER;
    return 0;
 }
 
index dc9064bb6384072e25f949360b4afd293497f7f5..597f9489b2c4618e52cb63334c5c2e6d8a935db4 100644 (file)
@@ -95,6 +95,11 @@ namespace clover {
 
       clover::platform &platform;
 
+      inline bool
+      has_system_svm() const {
+         return svm_support() & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM;
+      }
+
    private:
       pipe_screen *pipe;
       pipe_loader_device *ldev;
index aa119a4b82b655e466e47797b6e4f5ef65c2e5ab..7bb9951aef6c4f927f74183449a86da4bae1f272 100644 (file)
 #include <atomic>
 
 namespace clover {
+   ///
+   /// Some helper functions for raw pointer operations
+   ///
+   template <class T>
+   static bool
+   ptr_is_aligned(const T *ptr, uintptr_t a) noexcept {
+      assert(a == (a & -a));
+      uintptr_t ptr_value = reinterpret_cast<uintptr_t>(ptr);
+      return (ptr_value & (a - 1)) == 0;
+   }
+
    ///
    /// Base class for objects that support reference counting.
    ///