iris: propagate error from gen_perf_begin_query to glBeginPerfQueryINTEL

[mesa.git] / src / gallium / drivers / iris / iris_fence.c
diff --git a/src/gallium/drivers/iris/iris_fence.c b/src/gallium/drivers/iris/iris_fence.c

index c43c892bb87cee5650bdd212fdd4f6fb60e3d1a9..4600c1c4238231b0709234b75c09f757b3189344 100644 (file)
--- a/src/gallium/drivers/iris/iris_fence.c
+++ b/src/gallium/drivers/iris/iris_fence.c
@@ -26,8 +26,8 @@
   * Fences for driver and IPC serialisation, scheduling and synchronisation.
   */
  
-#include <linux/sync_file.h>
-
+#include "drm-uapi/sync_file.h"
+#include "util/u_debug.h"
  #include "util/u_inlines.h"
  #include "intel/common/gen_gem.h"
  
@@ -114,7 +114,10 @@ iris_batch_add_syncobj(struct iris_batch *batch,
  
  struct pipe_fence_handle {
     struct pipe_reference ref;
-   struct iris_seqno *seqno[IRIS_BATCH_COUNT];
+
+   struct pipe_context *unflushed_ctx;
+
+   struct iris_fine_fence *fine[IRIS_BATCH_COUNT];
  };
  
  static void
@@ -123,8 +126,8 @@ iris_fence_destroy(struct pipe_screen *p_screen,
  {
     struct iris_screen *screen = (struct iris_screen *)p_screen;
  
-   for (unsigned i = 0; i < ARRAY_SIZE(fence->seqno); i++)
-      iris_seqno_reference(screen, &fence->seqno[i], NULL);
+   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++)
+      iris_fine_fence_reference(screen, &fence->fine[i], NULL);
  
     free(fence);
  }
@@ -170,6 +173,14 @@ iris_fence_flush(struct pipe_context *ctx,
     struct iris_screen *screen = (void *) ctx->screen;
     struct iris_context *ice = (struct iris_context *)ctx;
  
+   /* We require DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (kernel 5.2+) for
+    * deferred flushes.  Just ignore the request to defer on older kernels.
+    */
+   if (!(screen->kernel_features & KERNEL_HAS_WAIT_FOR_SUBMIT))
+      flags &= ~PIPE_FLUSH_DEFERRED;
+
+   const bool deferred = flags & PIPE_FLUSH_DEFERRED;
+
     if (flags & PIPE_FLUSH_END_OF_FRAME) {
        ice->frame++;
  
@@ -181,9 +192,10 @@ iris_fence_flush(struct pipe_context *ctx,
        }
     }
  
-   /* XXX PIPE_FLUSH_DEFERRED */
-   for (unsigned i = 0; i < IRIS_BATCH_COUNT; i++)
-      iris_batch_flush(&ice->batches[i]);
+   if (!deferred) {
+      for (unsigned i = 0; i < IRIS_BATCH_COUNT; i++)
+         iris_batch_flush(&ice->batches[i]);
+   }
  
     if (!out_fence)
        return;
@@ -194,13 +206,27 @@ iris_fence_flush(struct pipe_context *ctx,
  
     pipe_reference_init(&fence->ref, 1);
  
+   if (deferred)
+      fence->unflushed_ctx = ctx;
+
     for (unsigned b = 0; b < IRIS_BATCH_COUNT; b++) {
        struct iris_batch *batch = &ice->batches[b];
  
-      if (iris_seqno_signaled(batch->last_seqno))
-         continue;
+      if (deferred && iris_batch_bytes_used(batch) > 0) {
+         struct iris_fine_fence *fine =
+            iris_fine_fence_new(batch, IRIS_FENCE_BOTTOM_OF_PIPE);
+         iris_fine_fence_reference(screen, &fence->fine[b], fine);
+         iris_fine_fence_reference(screen, &fine, NULL);
+      } else {
+         /* This batch has no commands queued up (perhaps we just flushed,
+          * or all the commands are on the other batch).  Wait for the last
+          * syncobj on this engine - unless it's already finished by now.
+          */
+         if (iris_fine_fence_signaled(batch->last_fence))
+            continue;
  
-      iris_seqno_reference(screen, &fence->seqno[b], batch->last_seqno);
+         iris_fine_fence_reference(screen, &fence->fine[b], batch->last_fence);
+      }
     }
  
     iris_fence_reference(ctx->screen, out_fence, NULL);
@@ -213,16 +239,37 @@ iris_fence_await(struct pipe_context *ctx,
  {
     struct iris_context *ice = (struct iris_context *)ctx;
  
+   /* Unflushed fences from the same context are no-ops. */
+   if (ctx && ctx == fence->unflushed_ctx)
+      return;
+
+   /* XXX: We can't safely flush the other context, because it might be
+    *      bound to another thread, and poking at its internals wouldn't
+    *      be safe.  In the future we should use MI_SEMAPHORE_WAIT and
+    *      block until the other job has been submitted, relying on
+    *      kernel timeslicing to preempt us until the other job is
+    *      actually flushed and the seqno finally passes.
+    */
+   if (fence->unflushed_ctx) {
+      pipe_debug_message(&ice->dbg, CONFORMANCE, "%s",
+                         "glWaitSync on unflushed fence from another context "
+                         "is unlikely to work without kernel 5.8+\n");
+   }
+
+   /* Flush any current work in our context as it doesn't need to wait
+    * for this fence.  Any future work in our context must wait.
+    */
     for (unsigned b = 0; b < IRIS_BATCH_COUNT; b++) {
        struct iris_batch *batch = &ice->batches[b];
  
-      for (unsigned i = 0; i < ARRAY_SIZE(fence->seqno); i++) {
-         struct iris_seqno *seqno = fence->seqno[i];
+      for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
+         struct iris_fine_fence *fine = fence->fine[i];
  
-         if (iris_seqno_signaled(seqno))
+         if (iris_fine_fence_signaled(fine))
              continue;
  
-         iris_batch_add_syncobj(batch, seqno->syncobj, I915_EXEC_FENCE_WAIT);
+         iris_batch_flush(batch);
+         iris_batch_add_syncobj(batch, fine->syncobj, I915_EXEC_FENCE_WAIT);
        }
     }
  }
@@ -259,17 +306,41 @@ iris_fence_finish(struct pipe_screen *p_screen,
                    struct pipe_fence_handle *fence,
                    uint64_t timeout)
  {
+   struct iris_context *ice = (struct iris_context *)ctx;
     struct iris_screen *screen = (struct iris_screen *)p_screen;
  
+   /* If we created the fence with PIPE_FLUSH_DEFERRED, we may not have
+    * flushed yet.  Check if our syncobj is the current batch's signalling
+    * syncobj - if so, we haven't flushed and need to now.
+    *
+    * The Gallium docs mention that a flush will occur if \p ctx matches
+    * the context the fence was created with.  It may be NULL, so we check
+    * that it matches first.
+    */
+   if (ctx && ctx == fence->unflushed_ctx) {
+      for (unsigned i = 0; i < IRIS_BATCH_COUNT; i++) {
+         struct iris_fine_fence *fine = fence->fine[i];
+
+         if (iris_fine_fence_signaled(fine))
+            continue;
+
+         if (fine->syncobj == iris_batch_get_signal_syncobj(&ice->batches[i]))
+            iris_batch_flush(&ice->batches[i]);
+      }
+
+      /* The fence is no longer deferred. */
+      fence->unflushed_ctx = NULL;
+   }
+
     unsigned int handle_count = 0;
-   uint32_t handles[ARRAY_SIZE(fence->seqno)];
-   for (unsigned i = 0; i < ARRAY_SIZE(fence->seqno); i++) {
-      struct iris_seqno *seqno = fence->seqno[i];
+   uint32_t handles[ARRAY_SIZE(fence->fine)];
+   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
+      struct iris_fine_fence *fine = fence->fine[i];
  
-      if (iris_seqno_signaled(seqno))
+      if (iris_fine_fence_signaled(fine))
           continue;
  
-      handles[handle_count++] = seqno->syncobj->handle;
+      handles[handle_count++] = fine->syncobj->handle;
     }
  
     if (handle_count == 0)
@@ -281,6 +352,18 @@ iris_fence_finish(struct pipe_screen *p_screen,
        .timeout_nsec = rel2abs(timeout),
        .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL
     };
+
+   if (fence->unflushed_ctx) {
+      /* This fence had a deferred flush from another context.  We can't
+       * safely flush it here, because the context might be bound to a
+       * different thread, and poking at its internals wouldn't be safe.
+       *
+       * Instead, use the WAIT_FOR_SUBMIT flag to block and hope that
+       * another thread submits the work.
+       */
+      args.flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+   }
+
     return gen_ioctl(screen->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args) == 0;
  }
  
@@ -313,14 +396,18 @@ iris_fence_get_fd(struct pipe_screen *p_screen,
     struct iris_screen *screen = (struct iris_screen *)p_screen;
     int fd = -1;
  
-   for (unsigned i = 0; i < ARRAY_SIZE(fence->seqno); i++) {
-      struct iris_seqno *seqno = fence->seqno[i];
+   /* Deferred fences aren't supported. */
+   if (fence->unflushed_ctx)
+      return -1;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(fence->fine); i++) {
+      struct iris_fine_fence *fine = fence->fine[i];
  
-      if (iris_seqno_signaled(seqno))
+      if (iris_fine_fence_signaled(fine))
           continue;
  
        struct drm_syncobj_handle args = {
-         .handle = seqno->syncobj->handle,
+         .handle = fine->syncobj->handle,
           .flags = DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE,
           .fd = -1,
        };
@@ -378,8 +465,8 @@ iris_fence_create_fd(struct pipe_context *ctx,
     syncobj->handle = args.handle;
     pipe_reference_init(&syncobj->ref, 1);
  
-   struct iris_seqno *seqno = malloc(sizeof(*seqno));
-   if (!seqno) {
+   struct iris_fine_fence *fine = calloc(1, sizeof(*fine));
+   if (!fine) {
        free(syncobj);
        *out = NULL;
        return;
@@ -387,25 +474,25 @@ iris_fence_create_fd(struct pipe_context *ctx,
  
     static const uint32_t zero = 0;
  
-   /* Fences work in terms of iris_seqno, but we don't actually have a
+   /* Fences work in terms of iris_fine_fence, but we don't actually have a
      * seqno for an imported fence.  So, create a fake one which always
      * returns as 'not signaled' so we fall back to using the sync object.
      */
-   seqno->seqno = UINT32_MAX;
-   seqno->map = &zero;
-   seqno->syncobj = syncobj;
-   seqno->flags = IRIS_SEQNO_END;
-   pipe_reference_init(&seqno->reference, 1);
+   fine->seqno = UINT32_MAX;
+   fine->map = &zero;
+   fine->syncobj = syncobj;
+   fine->flags = IRIS_FENCE_END;
+   pipe_reference_init(&fine->reference, 1);
  
     struct pipe_fence_handle *fence = calloc(1, sizeof(*fence));
     if (!fence) {
-      free(seqno);
+      free(fine);
        free(syncobj);
        *out = NULL;
        return;
     }
     pipe_reference_init(&fence->ref, 1);
-   fence->seqno[0] = seqno;
+   fence->fine[0] = fine;
  
     *out = fence;
  }