From cc9762d74d33898f56cfcbd9ab386364a19df6dc Mon Sep 17 00:00:00 2001 From: Andres Rodriguez Date: Thu, 26 Oct 2017 22:42:08 -0400 Subject: [PATCH] winsys/amdgpu: add support for syncobj signaling v3 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add the ability to signal a syncobj when a cs completes execution. v2: corresponding changes for gallium fence->semaphore rename v3: s/semaphore/fence for pipe objects Signed-off-by: Andres Rodriguez Reviewed-by: Marek Olšák --- src/gallium/drivers/radeon/radeon_winsys.h | 12 ++++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 83 +++++++++++++++++++++- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 4 ++ 3 files changed, 98 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h index 8a69a2b4982..238c921341d 100644 --- a/src/gallium/drivers/radeon/radeon_winsys.h +++ b/src/gallium/drivers/radeon/radeon_winsys.h @@ -582,6 +582,12 @@ struct radeon_winsys { void (*cs_add_fence_dependency)(struct radeon_winsys_cs *cs, struct pipe_fence_handle *fence); + /** + * Signal a syncobj when the CS finishes execution. + */ + void (*cs_add_syncobj_signal)(struct radeon_winsys_cs *cs, + struct pipe_fence_handle *fence); + /** * Wait for the fence and return true if the fence has been signalled. * The timeout of 0 will only return the status. @@ -598,6 +604,12 @@ struct radeon_winsys { void (*fence_reference)(struct pipe_fence_handle **dst, struct pipe_fence_handle *src); + /** + * Create a new fence object corresponding to the given syncobj fd. + */ + struct pipe_fence_handle *(*fence_import_syncobj)(struct radeon_winsys *ws, + int fd); + /** * Create a new fence object corresponding to the given sync_file. */ diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 66ba23d2319..1927a3ad275 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -56,6 +56,31 @@ amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type, return (struct pipe_fence_handle *)fence; } +static struct pipe_fence_handle * +amdgpu_fence_import_syncobj(struct radeon_winsys *rws, int fd) +{ + struct amdgpu_winsys *ws = amdgpu_winsys(rws); + struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); + int r; + + if (!fence) + return NULL; + + pipe_reference_init(&fence->reference, 1); + fence->ws = ws; + + r = amdgpu_cs_import_syncobj(ws->dev, fd, &fence->syncobj); + if (r) { + FREE(fence); + return NULL; + } + + util_queue_fence_init(&fence->submitted); + + assert(amdgpu_fence_is_syncobj(fence)); + return (struct pipe_fence_handle*)fence; +} + static struct pipe_fence_handle * amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd) { @@ -838,11 +863,14 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs) } for (i = 0; i < cs->num_fence_dependencies; i++) amdgpu_fence_reference(&cs->fence_dependencies[i], NULL); + for (i = 0; i < cs->num_syncobj_to_signal; i++) + amdgpu_fence_reference(&cs->syncobj_to_signal[i], NULL); cs->num_real_buffers = 0; cs->num_slab_buffers = 0; cs->num_sparse_buffers = 0; cs->num_fence_dependencies = 0; + cs->num_syncobj_to_signal = 0; amdgpu_fence_reference(&cs->fence, NULL); memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist)); @@ -858,6 +886,7 @@ static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs) FREE(cs->slab_buffers); FREE(cs->sparse_buffers); FREE(cs->fence_dependencies); + FREE(cs->syncobj_to_signal); } @@ -1167,6 +1196,36 @@ static void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs) amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers); } +static unsigned add_syncobj_to_signal_entry(struct amdgpu_cs_context *cs) +{ + unsigned idx = cs->num_syncobj_to_signal++; + + if (idx >= cs->max_syncobj_to_signal) { + unsigned size; + const unsigned increment = 8; + + cs->max_syncobj_to_signal = idx + increment; + size = cs->max_syncobj_to_signal * sizeof(cs->syncobj_to_signal[0]); + cs->syncobj_to_signal = realloc(cs->syncobj_to_signal, size); + /* Clear the newly-allocated elements. */ + memset(cs->syncobj_to_signal + idx, 0, + increment * sizeof(cs->syncobj_to_signal[0])); + } + return idx; +} + +static void amdgpu_cs_add_syncobj_signal(struct radeon_winsys_cs *rws, + struct pipe_fence_handle *fence) +{ + struct amdgpu_cs *acs = amdgpu_cs(rws); + struct amdgpu_cs_context *cs = acs->csc; + + assert(amdgpu_fence_is_syncobj((struct amdgpu_fence *)fence)); + + unsigned idx = add_syncobj_to_signal_entry(cs); + amdgpu_fence_reference(&cs->syncobj_to_signal[idx], fence); +} + /* Add backing of sparse buffers to the buffer list. * * This is done late, during submission, to keep the buffer list short before @@ -1297,7 +1356,7 @@ bo_list_error: if (acs->ctx->num_rejected_cs) { r = -ECANCELED; } else { - struct drm_amdgpu_cs_chunk chunks[4]; + struct drm_amdgpu_cs_chunk chunks[5]; unsigned num_chunks = 0; /* Convert from dwords to bytes. */ @@ -1368,6 +1427,26 @@ bo_list_error: num_chunks++; } + /* Syncobj sygnals. */ + if (cs->num_syncobj_to_signal) { + struct drm_amdgpu_cs_chunk_sem *sem_chunk = + alloca(cs->num_syncobj_to_signal * sizeof(sem_chunk[0])); + + for (unsigned i = 0; i < cs->num_syncobj_to_signal; i++) { + struct amdgpu_fence *fence = + (struct amdgpu_fence*)cs->syncobj_to_signal[i]; + + assert(amdgpu_fence_is_syncobj(fence)); + sem_chunk[i].handle = fence->syncobj; + } + + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_SYNCOBJ_OUT; + chunks[num_chunks].length_dw = sizeof(sem_chunk[0]) / 4 + * cs->num_syncobj_to_signal; + chunks[num_chunks].chunk_data = (uintptr_t)sem_chunk; + num_chunks++; + } + assert(num_chunks <= ARRAY_SIZE(chunks)); r = amdgpu_cs_submit_raw(ws->dev, acs->ctx->ctx, bo_list, @@ -1576,8 +1655,10 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced; ws->base.cs_sync_flush = amdgpu_cs_sync_flush; ws->base.cs_add_fence_dependency = amdgpu_cs_add_fence_dependency; + ws->base.cs_add_syncobj_signal = amdgpu_cs_add_syncobj_signal; ws->base.fence_wait = amdgpu_fence_wait_rel_timeout; ws->base.fence_reference = amdgpu_fence_reference; + ws->base.fence_import_syncobj = amdgpu_fence_import_syncobj; ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file; ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file; ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index fbf44b36610..80acb7cb8c2 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -104,6 +104,10 @@ struct amdgpu_cs_context { unsigned num_fence_dependencies; unsigned max_fence_dependencies; + struct pipe_fence_handle **syncobj_to_signal; + unsigned num_syncobj_to_signal; + unsigned max_syncobj_to_signal; + struct pipe_fence_handle *fence; /* the error returned from cs_flush for non-async submissions */ -- 2.30.2