From 2fc5fe0e85d7dce0d995b71499a95ac2162feb12 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 26 Jan 2017 17:29:32 +0100 Subject: [PATCH] winsys/amdgpu: add a fast exit path into amdgpu_cs_add_buffer MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The time spent in the function dropped by 37% for torcs. Reviewed-by: Nicolai Hähnle --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 16 ++++++++++++++++ src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 2ce8a6e71cf..9d5b0bde877 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -444,6 +444,15 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs, struct amdgpu_cs_buffer *buffer; int index; + /* Fast exit for no-op calls. + * This is very effective with suballocators and linear uploaders that + * are outside of the winsys. + */ + if (bo == cs->last_added_bo && + (usage & cs->last_added_bo_usage) == usage && + (1ull << priority) & cs->last_added_bo_priority_usage) + return cs->last_added_bo_index; + if (!bo->bo) { index = amdgpu_lookup_or_add_slab_buffer(acs, bo); if (index < 0) @@ -464,6 +473,11 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs, buffer->u.real.priority_usage |= 1llu << priority; buffer->usage |= usage; cs->flags[index] = MAX2(cs->flags[index], priority / 4); + + cs->last_added_bo = bo; + cs->last_added_bo_index = index; + cs->last_added_bo_usage = buffer->usage; + cs->last_added_bo_priority_usage = buffer->u.real.priority_usage; return index; } @@ -645,6 +659,7 @@ static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs, for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) { cs->buffer_indices_hashlist[i] = -1; } + cs->last_added_bo = NULL; cs->request.number_of_ibs = 1; cs->request.ibs = &cs->ib[IB_MAIN]; @@ -676,6 +691,7 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs) for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) { cs->buffer_indices_hashlist[i] = -1; } + cs->last_added_bo = NULL; } static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs) diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 90b9e839908..495d55bc23d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -94,6 +94,11 @@ struct amdgpu_cs_context { int buffer_indices_hashlist[4096]; + struct amdgpu_winsys_bo *last_added_bo; + unsigned last_added_bo_index; + unsigned last_added_bo_usage; + uint64_t last_added_bo_priority_usage; + unsigned max_dependencies; struct pipe_fence_handle *fence; -- 2.30.2