From 0ed952c7e9b811bc11dec64bd4bebcdf4222cc85 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 28 Nov 2017 16:17:16 -0800 Subject: [PATCH] broadcom/vc4: Use a single-entry cached last_hindex value. Since almost all BOs will be in one CL at a time, this cache will almost always hit except for the first usage of the BO in each CL. This didn't show up as statistically significant on the minetest trace (n=340), but if I lop off the throttled lobe of the bimodal distribution, it very clearly does (0.74731% +/- 0.162093%, n=269). --- src/gallium/drivers/vc4/vc4_bufmgr.h | 8 ++++++++ src/gallium/drivers/vc4/vc4_cl.c | 14 ++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h index 4e7b23e0862..e0f6bbcfd8b 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.h +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h @@ -39,6 +39,14 @@ struct vc4_bo { uint32_t handle; uint32_t size; + /* This will be read/written by multiple threads without a lock -- you + * should take a snapshot and use it to see if you happen to be in the + * CL's handles at this position, to make most lookups O(1). It's + * volatile to make sure that the compiler doesn't emit multiple loads + * from the address, which would make the lookup racy. + */ + volatile uint32_t last_hindex; + /** Entry in the linked list of buffers freed, by age. */ struct list_head time_list; /** Entry in the per-page-count linked list of buffers freed (by age). */ diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c index 508281a27bb..7ae092ebce3 100644 --- a/src/gallium/drivers/vc4/vc4_cl.c +++ b/src/gallium/drivers/vc4/vc4_cl.c @@ -61,10 +61,19 @@ vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo) { uint32_t hindex; uint32_t *current_handles = job->bo_handles.base; + uint32_t cl_hindex_count = cl_offset(&job->bo_handles) / 4; + uint32_t last_hindex = bo->last_hindex; /* volatile read! */ - for (hindex = 0; hindex < cl_offset(&job->bo_handles) / 4; hindex++) { - if (current_handles[hindex] == bo->handle) + if (last_hindex < cl_hindex_count && + current_handles[last_hindex] == bo->handle) { + return last_hindex; + } + + for (hindex = 0; hindex < cl_hindex_count; hindex++) { + if (current_handles[hindex] == bo->handle) { + bo->last_hindex = hindex; return hindex; + } } struct vc4_cl_out *out; @@ -79,5 +88,6 @@ vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo) job->bo_space += bo->size; + bo->last_hindex = hindex; return hindex; } -- 2.30.2