broadcom/vc5: Update the UABI for in/out syncobjs
authorEric Anholt <eric@anholt.net>
Wed, 4 Apr 2018 16:59:18 +0000 (09:59 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 12 Apr 2018 18:20:50 +0000 (11:20 -0700)
This is the ABI I'm hoping to stabilize for merging the driver.  seqnos
are eliminated, which allows for the GPU scheduler to task-switch between
DRM fds even after submission to the kernel.  In/out sync objects are
introduced, to allow the Android fencing extension (not yet implemented,
but should be trivial), and to also allow the driver to tell the kernel to
not start a bin until a previous render is complete.

src/gallium/drivers/vc5/v3dx_simulator.c
src/gallium/drivers/vc5/vc5_bufmgr.c
src/gallium/drivers/vc5/vc5_context.c
src/gallium/drivers/vc5/vc5_context.h
src/gallium/drivers/vc5/vc5_drm.h
src/gallium/drivers/vc5/vc5_fence.c
src/gallium/drivers/vc5/vc5_job.c
src/gallium/drivers/vc5/vc5_screen.h
src/gallium/drivers/vc5/vc5_simulator.c

index aed4aab204e010f46ebdba3be39ab3f35caa89f5..90fafaee1e2337824528670d8ada3f6e5ce9ffe9 100644 (file)
@@ -103,7 +103,7 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
                                 struct drm_vc5_get_param *args)
 {
         static const uint32_t reg_map[] = {
-                [DRM_VC5_PARAM_V3D_HUB_UIFCFG] = V3D_HUB_CTL_UIFCFG,
+                [DRM_VC5_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
                 [DRM_VC5_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
                 [DRM_VC5_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
                 [DRM_VC5_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
index bced512ae0d03bda6fcc6591d3e33207bf5707f8..7a9c04a268d6da4529ecb804ce0e259209e7499c 100644 (file)
@@ -451,44 +451,6 @@ vc5_bo_flink(struct vc5_bo *bo, uint32_t *name)
         return true;
 }
 
-static int vc5_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns)
-{
-        struct drm_vc5_wait_seqno wait = {
-                .seqno = seqno,
-                .timeout_ns = timeout_ns,
-        };
-        int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_SEQNO, &wait);
-        if (ret == -1)
-                return -errno;
-        else
-                return 0;
-
-}
-
-bool
-vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns,
-               const char *reason)
-{
-        if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) {
-                if (vc5_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) {
-                        fprintf(stderr, "Blocking on seqno %lld for %s\n",
-                                (long long)seqno, reason);
-                }
-        }
-
-        int ret = vc5_wait_seqno_ioctl(screen->fd, seqno, timeout_ns);
-        if (ret) {
-                if (ret != -ETIME) {
-                        fprintf(stderr, "wait failed: %d\n", ret);
-                        abort();
-                }
-
-                return false;
-        }
-
-        return true;
-}
-
 static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns)
 {
         struct drm_vc5_wait_bo wait = {
index 9403f8ffdd45ede25ef375da9a766d4d1935f35d..b6d1234879b729e7ce301325cc9aac8b4756ee1b 100644 (file)
@@ -60,8 +60,7 @@ vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
 
         if (fence) {
                 struct pipe_screen *screen = pctx->screen;
-                struct vc5_fence *f = vc5_fence_create(vc5->screen,
-                                                       vc5->last_emit_seqno);
+                struct vc5_fence *f = vc5_fence_create(vc5);
                 screen->fence_reference(screen, fence, NULL);
                 *fence = (struct pipe_fence_handle *)f;
         }
@@ -128,6 +127,13 @@ vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 
         vc5->screen = screen;
 
+        int ret = drmSyncobjCreate(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
+                                   &vc5->out_sync);
+        if (ret) {
+                ralloc_free(vc5);
+                return NULL;
+        }
+
         pctx->screen = pscreen;
         pctx->priv = priv;
         pctx->destroy = vc5_context_destroy;
index f61c37ba9242a2a6a41bf94884c85fa991aebb0b..e08a2a59909ee381801ce6552adad48f7447545b 100644 (file)
@@ -367,8 +367,8 @@ struct vc5_context {
         /** Maximum index buffer valid for the current shader_rec. */
         uint32_t max_index;
 
-        /** Seqno of the last CL flush's job. */
-        uint64_t last_emit_seqno;
+        /** Sync object that our RCL will update as its out_sync. */
+        uint32_t out_sync;
 
         struct u_upload_mgr *uploader;
 
@@ -549,6 +549,8 @@ void vc5_init_query_functions(struct vc5_context *vc5);
 void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info);
 void vc5_blitter_save(struct vc5_context *vc5);
 
+struct vc5_fence *vc5_fence_create(struct vc5_context *vc5);
+
 #ifdef v3dX
 #  include "v3dx_context.h"
 #else
index cd7e43ad47fb67f12725387daae3e97baeb7b9c1..184863d206a66f2d9e6a57b1cc45a3008a8c1fb6 100644 (file)
@@ -31,15 +31,13 @@ extern "C" {
 #endif
 
 #define DRM_VC5_SUBMIT_CL                         0x00
-#define DRM_VC5_WAIT_SEQNO                        0x01
-#define DRM_VC5_WAIT_BO                           0x02
-#define DRM_VC5_CREATE_BO                         0x03
-#define DRM_VC5_MMAP_BO                           0x04
-#define DRM_VC5_GET_PARAM                         0x05
-#define DRM_VC5_GET_BO_OFFSET                     0x06
+#define DRM_VC5_WAIT_BO                           0x01
+#define DRM_VC5_CREATE_BO                         0x02
+#define DRM_VC5_MMAP_BO                           0x03
+#define DRM_VC5_GET_PARAM                         0x04
+#define DRM_VC5_GET_BO_OFFSET                     0x05
 
 #define DRM_IOCTL_VC5_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl)
-#define DRM_IOCTL_VC5_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_SEQNO, struct drm_vc5_wait_seqno)
 #define DRM_IOCTL_VC5_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo)
 #define DRM_IOCTL_VC5_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_CREATE_BO, struct drm_vc5_create_bo)
 #define DRM_IOCTL_VC5_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo)
@@ -77,6 +75,13 @@ struct drm_vc5_submit_cl {
         /** End address of the RCL (first byte after the RCL) */
        __u32 rcl_end;
 
+       /** An optional sync object to wait on before starting the BCL. */
+       __u32 in_sync_bcl;
+       /** An optional sync object to wait on before starting the RCL. */
+       __u32 in_sync_rcl;
+       /** An optional sync object to place the completion fence in. */
+       __u32 out_sync;
+
        /* Offset of the tile alloc memory
         *
         * This is optional on V3D 3.3 (where the CL can set the value) but
@@ -84,39 +89,18 @@ struct drm_vc5_submit_cl {
         */
        __u32 qma;
 
-        /** Size of the tile alloc memory. */
+       /** Size of the tile alloc memory. */
        __u32 qms;
 
-        /** Offset of the tile state data array. */
+       /** Offset of the tile state data array. */
        __u32 qts;
 
        /* Pointer to a u32 array of the BOs that are referenced by the job.
         */
        __u64 bo_handles;
 
-       /* Pointer to an array of chunks of extra submit CL information. (the
-        * chunk struct is not yet defined)
-        */
-       __u64 chunks;
-
        /* Number of BO handles passed in (size is that times 4). */
        __u32 bo_handle_count;
-
-       __u32 chunk_count;
-
-       __u64 flags;
-};
-
-/**
- * struct drm_vc5_wait_seqno - ioctl argument for waiting for
- * DRM_VC5_SUBMIT_CL completion using its returned seqno.
- *
- * timeout_ns is the timeout in nanoseconds, where "0" means "don't
- * block, just return the status."
- */
-struct drm_vc5_wait_seqno {
-       __u64 seqno;
-       __u64 timeout_ns;
 };
 
 /**
@@ -148,6 +132,9 @@ struct drm_vc5_create_bo {
         * Returned offset for the BO in the V3D address space.  This offset
         * is private to the DRM fd and is valid for the lifetime of the GEM
         * handle.
+        *
+        * This offset value will always be nonzero, since various HW
+        * units treat 0 specially.
         */
        __u32 offset;
 };
@@ -172,13 +159,13 @@ struct drm_vc5_mmap_bo {
 };
 
 enum drm_vc5_param {
-        DRM_VC5_PARAM_V3D_HUB_UIFCFG,
-        DRM_VC5_PARAM_V3D_HUB_IDENT1,
-        DRM_VC5_PARAM_V3D_HUB_IDENT2,
-        DRM_VC5_PARAM_V3D_HUB_IDENT3,
-        DRM_VC5_PARAM_V3D_CORE0_IDENT0,
-        DRM_VC5_PARAM_V3D_CORE0_IDENT1,
-        DRM_VC5_PARAM_V3D_CORE0_IDENT2,
+       DRM_VC5_PARAM_V3D_UIFCFG,
+       DRM_VC5_PARAM_V3D_HUB_IDENT1,
+       DRM_VC5_PARAM_V3D_HUB_IDENT2,
+       DRM_VC5_PARAM_V3D_HUB_IDENT3,
+       DRM_VC5_PARAM_V3D_CORE0_IDENT0,
+       DRM_VC5_PARAM_V3D_CORE0_IDENT1,
+       DRM_VC5_PARAM_V3D_CORE0_IDENT2,
 };
 
 struct drm_vc5_get_param {
index 08de9bca5a1424d78a8bdd40f3ff8ded0bfb5a0c..731dd6db9086abf988b1a2d1e1eb2eb1a96e6b8e 100644 (file)
 
 #include "util/u_inlines.h"
 
-#include "vc5_screen.h"
+#include "vc5_context.h"
 #include "vc5_bufmgr.h"
 
 struct vc5_fence {
         struct pipe_reference reference;
-        uint64_t seqno;
+        uint32_t sync;
 };
 
 static void
@@ -49,11 +49,13 @@ vc5_fence_reference(struct pipe_screen *pscreen,
                     struct pipe_fence_handle **pp,
                     struct pipe_fence_handle *pf)
 {
+        struct vc5_screen *screen = vc5_screen(pscreen);
         struct vc5_fence **p = (struct vc5_fence **)pp;
         struct vc5_fence *f = (struct vc5_fence *)pf;
         struct vc5_fence *old = *p;
 
         if (pipe_reference(&(*p)->reference, &f->reference)) {
+                drmSyncobjDestroy(screen->fd, old->sync);
                 free(old);
         }
         *p = f;
@@ -68,19 +70,28 @@ vc5_fence_finish(struct pipe_screen *pscreen,
         struct vc5_screen *screen = vc5_screen(pscreen);
         struct vc5_fence *f = (struct vc5_fence *)pf;
 
-        return vc5_wait_seqno(screen, f->seqno, timeout_ns, "fence wait");
+        return drmSyncobjWait(screen->fd, &f->sync, 1, timeout_ns, 0, NULL);
 }
 
 struct vc5_fence *
-vc5_fence_create(struct vc5_screen *screen, uint64_t seqno)
+vc5_fence_create(struct vc5_context *vc5)
 {
         struct vc5_fence *f = calloc(1, sizeof(*f));
-
         if (!f)
                 return NULL;
 
+        uint32_t new_sync;
+        /* Make a new sync object for the context. */
+        int ret = drmSyncobjCreate(vc5->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
+                                   &new_sync);
+        if (ret) {
+                free(f);
+                return NULL;
+        }
+
         pipe_reference_init(&f->reference, 1);
-        f->seqno = seqno;
+        f->sync = vc5->out_sync;
+        vc5->out_sync = new_sync;
 
         return f;
 }
index 3f70539a1fbfb3acb6c66c51146e98fa2167607f..12dd75766a99acbff94a8fb5e253d0835a0d5168 100644 (file)
@@ -389,6 +389,7 @@ vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job)
                         v3d33_bcl_epilogue(vc5, job);
         }
 
+        job->submit.out_sync = vc5->out_sync;
         job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
         job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
 
index 05a770ba2a18e2fe8e548ee71e3e64cb7904a7f2..9a7c11a63b8ee02e5afb0e9816e0d2b2ea7679a4 100644 (file)
@@ -98,7 +98,4 @@ struct pipe_screen *vc5_screen_create(int fd);
 void
 vc5_fence_init(struct vc5_screen *screen);
 
-struct vc5_fence *
-vc5_fence_create(struct vc5_screen *screen, uint64_t seqno);
-
 #endif /* VC5_SCREEN_H */
index 93e8d44e30ecc5632fae05c92bd902be76c74c66..d677293f3eddfed67e34ec103684d5d2e39076fd 100644 (file)
@@ -543,7 +543,6 @@ vc5_simulator_ioctl(int fd, unsigned long request, void *args)
                 return vc5_simulator_mmap_bo_ioctl(fd, args);
 
         case DRM_IOCTL_VC5_WAIT_BO:
-        case DRM_IOCTL_VC5_WAIT_SEQNO:
                 /* We do all of the vc5 rendering synchronously, so we just
                  * return immediately on the wait ioctls.  This ignores any
                  * native rendering to the host BO, so it does mean we race on