turnip: Properly return VK_DEVICE_LOST on queuesubmit failures.
authorEric Anholt <eric@anholt.net>
Wed, 17 Jun 2020 22:58:33 +0000 (15:58 -0700)
committerMarge Bot <eric+marge@anholt.net>
Fri, 26 Jun 2020 19:34:17 +0000 (19:34 +0000)
The device lost support closely matches the anv code for the same.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2769>

src/freedreno/vulkan/tu_device.c
src/freedreno/vulkan/tu_fence.c
src/freedreno/vulkan/tu_private.h
src/freedreno/vulkan/tu_query.c

index dd046f39eefede1aeb79dc059fc98da2f1d35bdf..57905740eefef831f00d32fcd75937a06ebf36d5 100644 (file)
@@ -1216,6 +1216,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
    device->instance = physical_device->instance;
    device->physical_device = physical_device;
+   device->_lost = false;
 
    if (pAllocator)
       device->alloc = *pAllocator;
@@ -1365,6 +1366,29 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
    vk_free(&device->alloc, device);
 }
 
+VkResult
+_tu_device_set_lost(struct tu_device *device,
+                    const char *file, int line,
+                    const char *msg, ...)
+{
+   /* Set the flag indicating that waits should return in finite time even
+    * after device loss.
+    */
+   p_atomic_inc(&device->_lost);
+
+   /* TODO: Report the log message through VkDebugReportCallbackEXT instead */
+   fprintf(stderr, "%s:%d: ", file, line);
+   va_list ap;
+   va_start(ap, msg);
+   vfprintf(stderr, msg, ap);
+   va_end(ap);
+
+   if (env_var_as_boolean("TU_ABORT_ON_DEVICE_LOSS", false))
+      abort();
+
+   return VK_ERROR_DEVICE_LOST;
+}
+
 VkResult
 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
 {
@@ -1547,18 +1571,17 @@ tu_QueueSubmit(VkQueue _queue,
                                          pSubmits[i].waitSemaphoreCount,
                                          false, &in_syncobjs, &nr_in_syncobjs);
       if (result != VK_SUCCESS) {
-         /* TODO: emit VK_ERROR_DEVICE_LOST */
-         fprintf(stderr, "failed to allocate space for semaphore submission\n");
-         abort();
+         return tu_device_set_lost(queue->device,
+                                   "failed to allocate space for semaphore submission\n");
       }
 
       result = tu_get_semaphore_syncobjs(pSubmits[i].pSignalSemaphores,
                                          pSubmits[i].signalSemaphoreCount,
                                          false, &out_syncobjs, &nr_out_syncobjs);
       if (result != VK_SUCCESS) {
-         /* TODO: emit VK_ERROR_DEVICE_LOST */
-         fprintf(stderr, "failed to allocate space for semaphore submission\n");
-         abort();
+         free(in_syncobjs);
+         return tu_device_set_lost(queue->device,
+                                   "failed to allocate space for semaphore submission\n");
       }
 
       uint32_t entry_count = 0;
@@ -1617,8 +1640,10 @@ tu_QueueSubmit(VkQueue _queue,
                                     DRM_MSM_GEM_SUBMIT,
                                     &req, sizeof(req));
       if (ret) {
-         fprintf(stderr, "submit failed: %s\n", strerror(errno));
-         abort();
+         free(in_syncobjs);
+         free(out_syncobjs);
+         return tu_device_set_lost(queue->device, "submit failed: %s\n",
+                                   strerror(errno));
       }
 
       tu_bo_list_destroy(&bo_list);
@@ -1648,6 +1673,9 @@ tu_QueueWaitIdle(VkQueue _queue)
 {
    TU_FROM_HANDLE(tu_queue, queue, _queue);
 
+   if (tu_device_is_lost(queue->device))
+      return VK_ERROR_DEVICE_LOST;
+
    tu_fence_wait_idle(&queue->submit_fence);
 
    return VK_SUCCESS;
@@ -1658,6 +1686,9 @@ tu_DeviceWaitIdle(VkDevice _device)
 {
    TU_FROM_HANDLE(tu_device, device, _device);
 
+   if (tu_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
+
    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
       for (unsigned q = 0; q < device->queue_count[i]; q++) {
          tu_QueueWaitIdle(tu_queue_to_handle(&device->queues[i][q]));
index 62094c91092c561c85b135eb4968c83c6f90983f..f3de4ee35f2d888403277fa34cadc2001ef71702 100644 (file)
@@ -336,6 +336,9 @@ tu_WaitForFences(VkDevice _device,
 {
    TU_FROM_HANDLE(tu_device, device, _device);
 
+   if (tu_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
+
    /* add a simpler path for when fenceCount == 1? */
 
    struct pollfd stack_fds[8];
index a70bb3b7dce5ca205e0b51886faf57626092a709..0d59c3c065944c41546482b7e9edabf11238764d 100644 (file)
@@ -47,6 +47,7 @@
 #include "main/macros.h"
 #include "util/list.h"
 #include "util/macros.h"
+#include "util/u_atomic.h"
 #include "vk_alloc.h"
 #include "vk_debug_report.h"
 #include "wsi_common.h"
@@ -350,6 +351,7 @@ struct tu_device
    int queue_count[TU_MAX_QUEUE_FAMILIES];
 
    struct tu_physical_device *physical_device;
+   int _lost;
 
    struct ir3_compiler *compiler;
 
@@ -377,6 +379,18 @@ struct tu_device
    struct tu_device_extension_table enabled_extensions;
 };
 
+VkResult _tu_device_set_lost(struct tu_device *device,
+                             const char *file, int line,
+                             const char *msg, ...) PRINTFLIKE(4, 5);
+#define tu_device_set_lost(dev, ...) \
+   _tu_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
+
+static inline bool
+tu_device_is_lost(struct tu_device *device)
+{
+   return unlikely(p_atomic_read(&device->_lost));
+}
+
 VkResult
 tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size);
 VkResult
index 6a106a40614e55b5cc1ed8e1cc044def14a4be8d..143f144d1384e06d900744efe9b37a3701a19bfa 100644 (file)
@@ -311,6 +311,9 @@ tu_GetQueryPoolResults(VkDevice _device,
    TU_FROM_HANDLE(tu_query_pool, pool, queryPool);
    assert(firstQuery + queryCount <= pool->size);
 
+   if (tu_device_is_lost(device))
+      return VK_ERROR_DEVICE_LOST;
+
    switch (pool->type) {
    case VK_QUERY_TYPE_OCCLUSION:
    case VK_QUERY_TYPE_TIMESTAMP: