From 20e3a2430e0435b8ee4861553e5acd13c58cf90f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 21 Dec 2014 13:10:25 -0800 Subject: [PATCH] vc4: Avoid repeated hindex lookups in the loop over tiles. Improves norast performance of a microbenchmark by 11.1865% +/- 2.37673% (n=20). --- src/gallium/drivers/vc4/vc4_cl.h | 12 +++++++++--- src/gallium/drivers/vc4/vc4_context.c | 27 +++++++++++++++------------ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index 634a4b0a421..86cd0c797a6 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -117,10 +117,9 @@ cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n) } static inline void -cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, - struct vc4_bo *bo, uint32_t offset) +cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset) { - *(uint32_t *)(cl->base + cl->reloc_next) = vc4_gem_hindex(vc4, bo); + *(uint32_t *)(cl->base + cl->reloc_next) = hindex; cl->reloc_next += 4; cl->reloc_count--; @@ -128,4 +127,11 @@ cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, cl_u32(cl, offset); } +static inline void +cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, + struct vc4_bo *bo, uint32_t offset) +{ + cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset); +} + #endif /* VC4_CL_H */ diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index e49d6549929..906af05b44b 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -119,7 +119,6 @@ vc4_setup_rcl(struct vc4_context *vc4) */ struct vc4_surface *render_surf = csurf ? csurf : zsurf; struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture); - cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG); cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset); @@ -152,6 +151,10 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */ } + uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0; + uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0; + uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc); + for (int y = 0; y < ytiles; y++) { for (int x = 0; x < xtiles; x++) { bool end_of_frame = (x == xtiles - 1 && @@ -175,8 +178,8 @@ vc4_setup_rcl(struct vc4_context *vc4) vc4_rt_format_is_565(csurf->base.format) ? VC4_LOADSTORE_TILE_BUFFER_BGR565 : VC4_LOADSTORE_TILE_BUFFER_RGBA8888); - cl_reloc(vc4, &vc4->rcl, ctex->bo, - csurf->offset); + cl_reloc_hindex(&vc4->rcl, color_hindex, + csurf->offset); vc4_tile_coordinates(vc4, x, y, &coords_emitted); } @@ -191,8 +194,8 @@ vc4_setup_rcl(struct vc4_context *vc4) (zsurf->tiling << VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); cl_u8(&vc4->rcl, 0); - cl_reloc(vc4, &vc4->rcl, ztex->bo, - zsurf->offset); + cl_reloc_hindex(&vc4->rcl, depth_hindex, + zsurf->offset); vc4_tile_coordinates(vc4, x, y, &coords_emitted); } @@ -211,8 +214,8 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_start_reloc(&vc4->rcl, 1); cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST); - cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc, - (y * xtiles + x) * 32); + cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex, + (y * xtiles + x) * 32); if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { vc4_tile_coordinates(vc4, x, y, &coords_emitted); @@ -225,11 +228,11 @@ vc4_setup_rcl(struct vc4_context *vc4) VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT)); cl_u8(&vc4->rcl, VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR); - cl_reloc(vc4, &vc4->rcl, ztex->bo, - zsurf->offset | - ((end_of_frame && - !(vc4->resolve & PIPE_CLEAR_COLOR0)) ? - VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); + cl_reloc_hindex(&vc4->rcl, depth_hindex, + zsurf->offset | + ((end_of_frame && + !(vc4->resolve & PIPE_CLEAR_COLOR0)) ? + VC4_LOADSTORE_TILE_BUFFER_EOF : 0)); coords_emitted = false; } -- 2.30.2