From b225cdceccb225329298763baa302a9332288b18 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 Apr 2018 09:59:18 -0700 Subject: [PATCH] broadcom/vc5: Update the UABI for in/out syncobjs This is the ABI I'm hoping to stabilize for merging the driver. seqnos are eliminated, which allows for the GPU scheduler to task-switch between DRM fds even after submission to the kernel. In/out sync objects are introduced, to allow the Android fencing extension (not yet implemented, but should be trivial), and to also allow the driver to tell the kernel to not start a bin until a previous render is complete. --- src/gallium/drivers/vc5/v3dx_simulator.c | 2 +- src/gallium/drivers/vc5/vc5_bufmgr.c | 38 --------------- src/gallium/drivers/vc5/vc5_context.c | 10 +++- src/gallium/drivers/vc5/vc5_context.h | 6 ++- src/gallium/drivers/vc5/vc5_drm.h | 61 ++++++++++-------------- src/gallium/drivers/vc5/vc5_fence.c | 23 ++++++--- src/gallium/drivers/vc5/vc5_job.c | 1 + src/gallium/drivers/vc5/vc5_screen.h | 3 -- src/gallium/drivers/vc5/vc5_simulator.c | 1 - 9 files changed, 55 insertions(+), 90 deletions(-) diff --git a/src/gallium/drivers/vc5/v3dx_simulator.c b/src/gallium/drivers/vc5/v3dx_simulator.c index aed4aab204e..90fafaee1e2 100644 --- a/src/gallium/drivers/vc5/v3dx_simulator.c +++ b/src/gallium/drivers/vc5/v3dx_simulator.c @@ -103,7 +103,7 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d, struct drm_vc5_get_param *args) { static const uint32_t reg_map[] = { - [DRM_VC5_PARAM_V3D_HUB_UIFCFG] = V3D_HUB_CTL_UIFCFG, + [DRM_VC5_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG, [DRM_VC5_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1, [DRM_VC5_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2, [DRM_VC5_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3, diff --git a/src/gallium/drivers/vc5/vc5_bufmgr.c b/src/gallium/drivers/vc5/vc5_bufmgr.c index bced512ae0d..7a9c04a268d 100644 --- a/src/gallium/drivers/vc5/vc5_bufmgr.c +++ b/src/gallium/drivers/vc5/vc5_bufmgr.c @@ -451,44 +451,6 @@ vc5_bo_flink(struct vc5_bo *bo, uint32_t *name) return true; } -static int vc5_wait_seqno_ioctl(int fd, uint64_t seqno, uint64_t timeout_ns) -{ - struct drm_vc5_wait_seqno wait = { - .seqno = seqno, - .timeout_ns = timeout_ns, - }; - int ret = vc5_ioctl(fd, DRM_IOCTL_VC5_WAIT_SEQNO, &wait); - if (ret == -1) - return -errno; - else - return 0; - -} - -bool -vc5_wait_seqno(struct vc5_screen *screen, uint64_t seqno, uint64_t timeout_ns, - const char *reason) -{ - if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF) && timeout_ns && reason) { - if (vc5_wait_seqno_ioctl(screen->fd, seqno, 0) == -ETIME) { - fprintf(stderr, "Blocking on seqno %lld for %s\n", - (long long)seqno, reason); - } - } - - int ret = vc5_wait_seqno_ioctl(screen->fd, seqno, timeout_ns); - if (ret) { - if (ret != -ETIME) { - fprintf(stderr, "wait failed: %d\n", ret); - abort(); - } - - return false; - } - - return true; -} - static int vc5_wait_bo_ioctl(int fd, uint32_t handle, uint64_t timeout_ns) { struct drm_vc5_wait_bo wait = { diff --git a/src/gallium/drivers/vc5/vc5_context.c b/src/gallium/drivers/vc5/vc5_context.c index 9403f8ffdd4..b6d1234879b 100644 --- a/src/gallium/drivers/vc5/vc5_context.c +++ b/src/gallium/drivers/vc5/vc5_context.c @@ -60,8 +60,7 @@ vc5_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, if (fence) { struct pipe_screen *screen = pctx->screen; - struct vc5_fence *f = vc5_fence_create(vc5->screen, - vc5->last_emit_seqno); + struct vc5_fence *f = vc5_fence_create(vc5); screen->fence_reference(screen, fence, NULL); *fence = (struct pipe_fence_handle *)f; } @@ -128,6 +127,13 @@ vc5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) vc5->screen = screen; + int ret = drmSyncobjCreate(screen->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &vc5->out_sync); + if (ret) { + ralloc_free(vc5); + return NULL; + } + pctx->screen = pscreen; pctx->priv = priv; pctx->destroy = vc5_context_destroy; diff --git a/src/gallium/drivers/vc5/vc5_context.h b/src/gallium/drivers/vc5/vc5_context.h index f61c37ba924..e08a2a59909 100644 --- a/src/gallium/drivers/vc5/vc5_context.h +++ b/src/gallium/drivers/vc5/vc5_context.h @@ -367,8 +367,8 @@ struct vc5_context { /** Maximum index buffer valid for the current shader_rec. */ uint32_t max_index; - /** Seqno of the last CL flush's job. */ - uint64_t last_emit_seqno; + /** Sync object that our RCL will update as its out_sync. */ + uint32_t out_sync; struct u_upload_mgr *uploader; @@ -549,6 +549,8 @@ void vc5_init_query_functions(struct vc5_context *vc5); void vc5_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info); void vc5_blitter_save(struct vc5_context *vc5); +struct vc5_fence *vc5_fence_create(struct vc5_context *vc5); + #ifdef v3dX # include "v3dx_context.h" #else diff --git a/src/gallium/drivers/vc5/vc5_drm.h b/src/gallium/drivers/vc5/vc5_drm.h index cd7e43ad47f..184863d206a 100644 --- a/src/gallium/drivers/vc5/vc5_drm.h +++ b/src/gallium/drivers/vc5/vc5_drm.h @@ -31,15 +31,13 @@ extern "C" { #endif #define DRM_VC5_SUBMIT_CL 0x00 -#define DRM_VC5_WAIT_SEQNO 0x01 -#define DRM_VC5_WAIT_BO 0x02 -#define DRM_VC5_CREATE_BO 0x03 -#define DRM_VC5_MMAP_BO 0x04 -#define DRM_VC5_GET_PARAM 0x05 -#define DRM_VC5_GET_BO_OFFSET 0x06 +#define DRM_VC5_WAIT_BO 0x01 +#define DRM_VC5_CREATE_BO 0x02 +#define DRM_VC5_MMAP_BO 0x03 +#define DRM_VC5_GET_PARAM 0x04 +#define DRM_VC5_GET_BO_OFFSET 0x05 #define DRM_IOCTL_VC5_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_SUBMIT_CL, struct drm_vc5_submit_cl) -#define DRM_IOCTL_VC5_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_SEQNO, struct drm_vc5_wait_seqno) #define DRM_IOCTL_VC5_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_WAIT_BO, struct drm_vc5_wait_bo) #define DRM_IOCTL_VC5_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_CREATE_BO, struct drm_vc5_create_bo) #define DRM_IOCTL_VC5_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC5_MMAP_BO, struct drm_vc5_mmap_bo) @@ -77,6 +75,13 @@ struct drm_vc5_submit_cl { /** End address of the RCL (first byte after the RCL) */ __u32 rcl_end; + /** An optional sync object to wait on before starting the BCL. */ + __u32 in_sync_bcl; + /** An optional sync object to wait on before starting the RCL. */ + __u32 in_sync_rcl; + /** An optional sync object to place the completion fence in. */ + __u32 out_sync; + /* Offset of the tile alloc memory * * This is optional on V3D 3.3 (where the CL can set the value) but @@ -84,39 +89,18 @@ struct drm_vc5_submit_cl { */ __u32 qma; - /** Size of the tile alloc memory. */ + /** Size of the tile alloc memory. */ __u32 qms; - /** Offset of the tile state data array. */ + /** Offset of the tile state data array. */ __u32 qts; /* Pointer to a u32 array of the BOs that are referenced by the job. */ __u64 bo_handles; - /* Pointer to an array of chunks of extra submit CL information. (the - * chunk struct is not yet defined) - */ - __u64 chunks; - /* Number of BO handles passed in (size is that times 4). */ __u32 bo_handle_count; - - __u32 chunk_count; - - __u64 flags; -}; - -/** - * struct drm_vc5_wait_seqno - ioctl argument for waiting for - * DRM_VC5_SUBMIT_CL completion using its returned seqno. - * - * timeout_ns is the timeout in nanoseconds, where "0" means "don't - * block, just return the status." - */ -struct drm_vc5_wait_seqno { - __u64 seqno; - __u64 timeout_ns; }; /** @@ -148,6 +132,9 @@ struct drm_vc5_create_bo { * Returned offset for the BO in the V3D address space. This offset * is private to the DRM fd and is valid for the lifetime of the GEM * handle. + * + * This offset value will always be nonzero, since various HW + * units treat 0 specially. */ __u32 offset; }; @@ -172,13 +159,13 @@ struct drm_vc5_mmap_bo { }; enum drm_vc5_param { - DRM_VC5_PARAM_V3D_HUB_UIFCFG, - DRM_VC5_PARAM_V3D_HUB_IDENT1, - DRM_VC5_PARAM_V3D_HUB_IDENT2, - DRM_VC5_PARAM_V3D_HUB_IDENT3, - DRM_VC5_PARAM_V3D_CORE0_IDENT0, - DRM_VC5_PARAM_V3D_CORE0_IDENT1, - DRM_VC5_PARAM_V3D_CORE0_IDENT2, + DRM_VC5_PARAM_V3D_UIFCFG, + DRM_VC5_PARAM_V3D_HUB_IDENT1, + DRM_VC5_PARAM_V3D_HUB_IDENT2, + DRM_VC5_PARAM_V3D_HUB_IDENT3, + DRM_VC5_PARAM_V3D_CORE0_IDENT0, + DRM_VC5_PARAM_V3D_CORE0_IDENT1, + DRM_VC5_PARAM_V3D_CORE0_IDENT2, }; struct drm_vc5_get_param { diff --git a/src/gallium/drivers/vc5/vc5_fence.c b/src/gallium/drivers/vc5/vc5_fence.c index 08de9bca5a1..731dd6db908 100644 --- a/src/gallium/drivers/vc5/vc5_fence.c +++ b/src/gallium/drivers/vc5/vc5_fence.c @@ -36,12 +36,12 @@ #include "util/u_inlines.h" -#include "vc5_screen.h" +#include "vc5_context.h" #include "vc5_bufmgr.h" struct vc5_fence { struct pipe_reference reference; - uint64_t seqno; + uint32_t sync; }; static void @@ -49,11 +49,13 @@ vc5_fence_reference(struct pipe_screen *pscreen, struct pipe_fence_handle **pp, struct pipe_fence_handle *pf) { + struct vc5_screen *screen = vc5_screen(pscreen); struct vc5_fence **p = (struct vc5_fence **)pp; struct vc5_fence *f = (struct vc5_fence *)pf; struct vc5_fence *old = *p; if (pipe_reference(&(*p)->reference, &f->reference)) { + drmSyncobjDestroy(screen->fd, old->sync); free(old); } *p = f; @@ -68,19 +70,28 @@ vc5_fence_finish(struct pipe_screen *pscreen, struct vc5_screen *screen = vc5_screen(pscreen); struct vc5_fence *f = (struct vc5_fence *)pf; - return vc5_wait_seqno(screen, f->seqno, timeout_ns, "fence wait"); + return drmSyncobjWait(screen->fd, &f->sync, 1, timeout_ns, 0, NULL); } struct vc5_fence * -vc5_fence_create(struct vc5_screen *screen, uint64_t seqno) +vc5_fence_create(struct vc5_context *vc5) { struct vc5_fence *f = calloc(1, sizeof(*f)); - if (!f) return NULL; + uint32_t new_sync; + /* Make a new sync object for the context. */ + int ret = drmSyncobjCreate(vc5->fd, DRM_SYNCOBJ_CREATE_SIGNALED, + &new_sync); + if (ret) { + free(f); + return NULL; + } + pipe_reference_init(&f->reference, 1); - f->seqno = seqno; + f->sync = vc5->out_sync; + vc5->out_sync = new_sync; return f; } diff --git a/src/gallium/drivers/vc5/vc5_job.c b/src/gallium/drivers/vc5/vc5_job.c index 3f70539a1fb..12dd75766a9 100644 --- a/src/gallium/drivers/vc5/vc5_job.c +++ b/src/gallium/drivers/vc5/vc5_job.c @@ -389,6 +389,7 @@ vc5_job_submit(struct vc5_context *vc5, struct vc5_job *job) v3d33_bcl_epilogue(vc5, job); } + job->submit.out_sync = vc5->out_sync; job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl); job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl); diff --git a/src/gallium/drivers/vc5/vc5_screen.h b/src/gallium/drivers/vc5/vc5_screen.h index 05a770ba2a1..9a7c11a63b8 100644 --- a/src/gallium/drivers/vc5/vc5_screen.h +++ b/src/gallium/drivers/vc5/vc5_screen.h @@ -98,7 +98,4 @@ struct pipe_screen *vc5_screen_create(int fd); void vc5_fence_init(struct vc5_screen *screen); -struct vc5_fence * -vc5_fence_create(struct vc5_screen *screen, uint64_t seqno); - #endif /* VC5_SCREEN_H */ diff --git a/src/gallium/drivers/vc5/vc5_simulator.c b/src/gallium/drivers/vc5/vc5_simulator.c index 93e8d44e30e..d677293f3ed 100644 --- a/src/gallium/drivers/vc5/vc5_simulator.c +++ b/src/gallium/drivers/vc5/vc5_simulator.c @@ -543,7 +543,6 @@ vc5_simulator_ioctl(int fd, unsigned long request, void *args) return vc5_simulator_mmap_bo_ioctl(fd, args); case DRM_IOCTL_VC5_WAIT_BO: - case DRM_IOCTL_VC5_WAIT_SEQNO: /* We do all of the vc5 rendering synchronously, so we just * return immediately on the wait ioctls. This ignores any * native rendering to the host BO, so it does mean we race on -- 2.30.2