winsys/amdgpu: add a fast exit path into amdgpu_cs_add_buffer
authorMarek Olšák <marek.olsak@amd.com>
Thu, 26 Jan 2017 16:29:32 +0000 (17:29 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Mon, 30 Jan 2017 12:57:09 +0000 (13:57 +0100)
The time spent in the function dropped by 37% for torcs.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h

index 2ce8a6e71cffc43748bac3e13ff74a26b686846b..9d5b0bde877f581b419be0a24c519d1fabd08db2 100644 (file)
@@ -444,6 +444,15 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
    struct amdgpu_cs_buffer *buffer;
    int index;
 
+   /* Fast exit for no-op calls.
+    * This is very effective with suballocators and linear uploaders that
+    * are outside of the winsys.
+    */
+   if (bo == cs->last_added_bo &&
+       (usage & cs->last_added_bo_usage) == usage &&
+       (1ull << priority) & cs->last_added_bo_priority_usage)
+      return cs->last_added_bo_index;
+
    if (!bo->bo) {
       index = amdgpu_lookup_or_add_slab_buffer(acs, bo);
       if (index < 0)
@@ -464,6 +473,11 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
    buffer->u.real.priority_usage |= 1llu << priority;
    buffer->usage |= usage;
    cs->flags[index] = MAX2(cs->flags[index], priority / 4);
+
+   cs->last_added_bo = bo;
+   cs->last_added_bo_index = index;
+   cs->last_added_bo_usage = buffer->usage;
+   cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
    return index;
 }
 
@@ -645,6 +659,7 @@ static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
    for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
       cs->buffer_indices_hashlist[i] = -1;
    }
+   cs->last_added_bo = NULL;
 
    cs->request.number_of_ibs = 1;
    cs->request.ibs = &cs->ib[IB_MAIN];
@@ -676,6 +691,7 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
    for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
       cs->buffer_indices_hashlist[i] = -1;
    }
+   cs->last_added_bo = NULL;
 }
 
 static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs)
index 90b9e83990896720fc87fba9d38e0d51994d4d7a..495d55bc23dae47bd266e3b84e6fb60c77c41b8a 100644 (file)
@@ -94,6 +94,11 @@ struct amdgpu_cs_context {
 
    int                         buffer_indices_hashlist[4096];
 
+   struct amdgpu_winsys_bo     *last_added_bo;
+   unsigned                    last_added_bo_index;
+   unsigned                    last_added_bo_usage;
+   uint64_t                    last_added_bo_priority_usage;
+
    unsigned                    max_dependencies;
 
    struct pipe_fence_handle    *fence;