vc4: Update for new kernel ABI with async execution and waits.
authorEric Anholt <eric@anholt.net>
Thu, 20 Nov 2014 01:39:04 +0000 (17:39 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 20 Nov 2014 21:07:07 +0000 (13:07 -0800)
Our submits now return immediately and you have to manually wait for
things to complete if you want to (like a normal driver).

src/gallium/drivers/vc4/Makefile.sources
src/gallium/drivers/vc4/vc4_bufmgr.c
src/gallium/drivers/vc4/vc4_bufmgr.h
src/gallium/drivers/vc4/vc4_context.c
src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_drm.h
src/gallium/drivers/vc4/vc4_fence.c [new file with mode: 0644]
src/gallium/drivers/vc4/vc4_resource.c
src/gallium/drivers/vc4/vc4_screen.h

index 23365659c3a7d3d5ef2e1eaa129e92a34b2c1465..6ec48ab36be78631d8a1a1df6d2f011d9d8043ed 100644 (file)
@@ -9,6 +9,7 @@ C_SOURCES := \
        vc4_draw.c \
        vc4_drm.h \
        vc4_emit.c \
+       vc4_fence.c \
        vc4_formats.c \
        vc4_opt_algebraic.c \
        vc4_opt_copy_propagation.c \
index 33592e84527022e986a9f41bd1d59493257b4729..3b73ac80bf6634ad3d08e44c7e81136bc6e9caf7 100644 (file)
@@ -152,8 +152,57 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
         return true;
 }
 
+bool
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
+{
+#ifndef USE_VC4_SIMULATOR
+        struct drm_vc4_wait_seqno wait;
+        memset(&wait, 0, sizeof(wait));
+        wait.seqno = seqno;
+        wait.timeout_ns = timeout_ns;
+
+        int ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_SEQNO, &wait);
+        if (ret == -ETIME) {
+                return false;
+        } else if (ret != 0) {
+                fprintf(stderr, "wait failed\n");
+                abort();
+        } else {
+                screen->finished_seqno = wait.seqno;
+                return true;
+        }
+#else
+        return true;
+#endif
+}
+
+bool
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
+{
+#ifndef USE_VC4_SIMULATOR
+        struct vc4_screen *screen = bo->screen;
+
+        struct drm_vc4_wait_bo wait;
+        memset(&wait, 0, sizeof(wait));
+        wait.handle = bo->handle;
+        wait.timeout_ns = timeout_ns;
+
+        int ret = drmIoctl(screen->fd, DRM_IOCTL_VC4_WAIT_BO, &wait);
+        if (ret == -ETIME) {
+                return false;
+        } else if (ret != 0) {
+                fprintf(stderr, "wait failed\n");
+                abort();
+        } else {
+                return true;
+        }
+#else
+        return true;
+#endif
+}
+
 void *
-vc4_bo_map(struct vc4_bo *bo)
+vc4_bo_map_unsynchronized(struct vc4_bo *bo)
 {
         int ret;
 
@@ -179,3 +228,17 @@ vc4_bo_map(struct vc4_bo *bo)
 
         return bo->map;
 }
+
+void *
+vc4_bo_map(struct vc4_bo *bo)
+{
+        void *map = vc4_bo_map_unsynchronized(bo);
+
+        bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE);
+        if (!ok) {
+                fprintf(stderr, "BO wait for map failed\n");
+                abort();
+        }
+
+        return map;
+}
index 00ea149bd5f888b4ee2b3b413a0abd77a90cab5f..4a1d4a4ef0d670ff71255123b581781970cb8374 100644 (file)
@@ -78,9 +78,17 @@ vc4_bo_unreference(struct vc4_bo **bo)
         *bo = NULL;
 }
 
-
 void *
 vc4_bo_map(struct vc4_bo *bo);
 
+void *
+vc4_bo_map_unsynchronized(struct vc4_bo *bo);
+
+bool
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
+
+bool
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
+
 #endif /* VC4_BUFMGR_H */
 
index a6becaf73fccd2f3c5cfdda04d3a0f48ac75bab5..bb30c0e8ca785c11edd48daacfc99abc62e1323e 100644 (file)
@@ -322,6 +322,8 @@ vc4_flush(struct pipe_context *pctx)
                 }
         }
 
+        vc4->last_emit_seqno = submit.seqno;
+
         vc4_reset_cl(&vc4->bcl);
         vc4_reset_cl(&vc4->rcl);
         vc4_reset_cl(&vc4->shader_rec);
@@ -350,7 +352,15 @@ static void
 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
                unsigned flags)
 {
+        struct vc4_context *vc4 = vc4_context(pctx);
+
         vc4_flush(pctx);
+
+        if (fence) {
+                struct vc4_fence *f = vc4_fence_create(vc4->screen,
+                                                       vc4->last_emit_seqno);
+                *fence = (struct pipe_fence_handle *)f;
+        }
 }
 
 /**
index 6a82d8fe5a4f255a220f7f43bf350e89f363fb88..207a7b4e67256cf6bd2e95dfa7807f0989385a1f 100644 (file)
@@ -219,6 +219,9 @@ struct vc4_context {
 
         uint8_t prim_mode;
 
+        /** Seqno of the last CL flush's job. */
+        uint64_t last_emit_seqno;
+
         /** @{ Current pipeline state objects */
         struct pipe_scissor_state scissor;
         struct pipe_blend_state *blend;
index 7d440742191e92d3de445803006796b03fb2b33a..34f5a88fa01e855ef3fe93e33ff5c4691cb4c252 100644 (file)
 #include <drm.h>
 
 #define DRM_VC4_SUBMIT_CL                         0x00
+#define DRM_VC4_WAIT_SEQNO                        0x01
+#define DRM_VC4_WAIT_BO                           0x02
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+#define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+#define DRM_IOCTL_VC4_WAIT_BO             DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo)
+
 
 /**
  * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D
@@ -109,6 +114,39 @@ struct drm_vc4_submit_cl {
 
        /* Number of BO handles passed in (size is that times 4). */
        uint32_t bo_handle_count;
+
+       uint32_t pad;
+
+       /* Returned value of the seqno of this render job (for the
+        * wait ioctl).
+        */
+       uint64_t seqno;
+};
+
+/**
+ * struct drm_vc4_wait_seqno - ioctl argument for waiting for
+ * DRM_VC4_SUBMIT_CL completion using its returned seqno.
+ *
+ * timeout_ns is the timeout in nanoseconds, where "0" means "don't
+ * block, just return the status."
+ */
+struct drm_vc4_wait_seqno {
+       uint64_t seqno;
+       uint64_t timeout_ns;
+};
+
+/**
+ * struct drm_vc4_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_VC4_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_vc4_wait_bo {
+       uint32_t handle;
+       uint32_t pad;
+       uint64_t timeout_ns;
 };
 
 #endif /* _UAPI_VC4_DRM_H_ */
diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c
new file mode 100644 (file)
index 0000000..c081d51
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2014 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file vc4_fence.c
+ *
+ * Seqno-based fence management.
+ *
+ * We have two mechanisms for waiting in our kernel API: You can wait on a BO
+ * to have all rendering to from any process to be completed, or wait on a
+ * seqno for that particular seqno to be passed.  The fence API we're
+ * implementing is based on waiting for all rendering in the context to have
+ * completed (with no reference to what other processes might be doing with
+ * the same BOs), so we can just use the seqno of the last rendering we'd
+ * fired off as our fence marker.
+ */
+
+#include "util/u_inlines.h"
+
+#include "vc4_screen.h"
+#include "vc4_bufmgr.h"
+
+struct vc4_fence {
+        struct pipe_reference reference;
+        uint64_t seqno;
+};
+
+static void
+vc4_fence_reference(struct pipe_screen *pscreen,
+                    struct pipe_fence_handle **pp,
+                    struct pipe_fence_handle *pf)
+{
+        struct vc4_fence **p = (struct vc4_fence **)pp;
+        struct vc4_fence *f = (struct vc4_fence *)pf;
+        struct vc4_fence *old = *p;
+
+        if (pipe_reference(&(*p)->reference, &f->reference)) {
+                free(old);
+        }
+}
+
+static boolean
+vc4_fence_signalled(struct pipe_screen *pscreen,
+                    struct pipe_fence_handle *pf)
+{
+        struct vc4_screen *screen = vc4_screen(pscreen);
+        struct vc4_fence *f = (struct vc4_fence *)pf;
+
+        if (screen->finished_seqno >= f->seqno)
+                return true;
+
+        return vc4_wait_seqno(screen, f->seqno, 0);
+}
+
+static boolean
+vc4_fence_finish(struct pipe_screen *pscreen,
+                 struct pipe_fence_handle *pf,
+                 uint64_t timeout_ns)
+{
+        struct vc4_screen *screen = vc4_screen(pscreen);
+        struct vc4_fence *f = (struct vc4_fence *)pf;
+
+        if (screen->finished_seqno >= f->seqno)
+                return true;
+
+        return vc4_wait_seqno(screen, f->seqno, timeout_ns);
+}
+
+struct vc4_fence *
+vc4_fence_create(struct vc4_screen *screen, uint64_t seqno)
+{
+        struct vc4_fence *f = calloc(1, sizeof(*f));
+
+        if (!f)
+                return NULL;
+
+        pipe_reference_init(&f->reference, 1);
+        f->seqno = seqno;
+
+        return f;
+}
+
+void
+vc4_fence_init(struct vc4_screen *screen)
+{
+        screen->base.fence_reference = vc4_fence_reference;
+        screen->base.fence_signalled = vc4_fence_signalled;
+        screen->base.fence_finish = vc4_fence_finish;
+}
index b02e2899329aad929b8f8bdff2c9c43c09758872..a00ce71049b8ee1cb10ca6479f99d6fad9489f31 100644 (file)
@@ -126,7 +126,10 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
          * need to do syncing stuff here yet.
          */
 
-        buf = vc4_bo_map(rsc->bo);
+        if (usage & PIPE_TRANSFER_UNSYNCHRONIZED)
+                buf = vc4_bo_map_unsynchronized(rsc->bo);
+        else
+                buf = vc4_bo_map(rsc->bo);
         if (!buf) {
                 fprintf(stderr, "Failed to map bo\n");
                 goto fail;
index 470cb06e2bf436e81e944c8416e39de2b43411f6..ba07490fc9462456b72dc8c76c5dd50939573c5f 100644 (file)
@@ -47,6 +47,13 @@ struct vc4_screen {
 
         void *simulator_mem_base;
         uint32_t simulator_mem_size;
+
+        /** The last seqno we've completed a wait for.
+         *
+         * This lets us slightly optimize our waits by skipping wait syscalls
+         * if we know the job's already done.
+         */
+        uint64_t finished_seqno;
 };
 
 static inline struct vc4_screen *
@@ -67,4 +74,10 @@ vc4_screen_bo_from_handle(struct pipe_screen *pscreen,
 
 extern uint32_t vc4_debug;
 
+void
+vc4_fence_init(struct vc4_screen *screen);
+
+struct vc4_fence *
+vc4_fence_create(struct vc4_screen *screen, uint64_t seqno);
+
 #endif /* VC4_SCREEN_H */