vc4: Avoid repeated hindex lookups in the loop over tiles.
authorEric Anholt <eric@anholt.net>
Sun, 21 Dec 2014 21:10:25 +0000 (13:10 -0800)
committerEric Anholt <eric@anholt.net>
Wed, 24 Dec 2014 18:28:33 +0000 (08:28 -1000)
Improves norast performance of a microbenchmark by 11.1865% +/- 2.37673%
(n=20).

src/gallium/drivers/vc4/vc4_cl.h
src/gallium/drivers/vc4/vc4_context.c

index 634a4b0a4217a36357a2dbd9f13e8b96273175ba..86cd0c797a6208e6387251f5d3b1fe26d56b9862 100644 (file)
@@ -117,10 +117,9 @@ cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
 }
 
 static inline void
-cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
-         struct vc4_bo *bo, uint32_t offset)
+cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset)
 {
-        *(uint32_t *)(cl->base + cl->reloc_next) = vc4_gem_hindex(vc4, bo);
+        *(uint32_t *)(cl->base + cl->reloc_next) = hindex;
         cl->reloc_next += 4;
 
         cl->reloc_count--;
@@ -128,4 +127,11 @@ cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
         cl_u32(cl, offset);
 }
 
+static inline void
+cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl,
+         struct vc4_bo *bo, uint32_t offset)
+{
+        cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset);
+}
+
 #endif /* VC4_CL_H */
index e49d6549929dcdfe8d027790bfe864e806d3bd10..906af05b44b532ae7d4885a824269f6db01be562 100644 (file)
@@ -119,7 +119,6 @@ vc4_setup_rcl(struct vc4_context *vc4)
          */
         struct vc4_surface *render_surf = csurf ? csurf : zsurf;
         struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
-
         cl_start_reloc(&vc4->rcl, 1);
         cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
         cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
@@ -152,6 +151,10 @@ vc4_setup_rcl(struct vc4_context *vc4)
                 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
         }
 
+        uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
+        uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
+        uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
+
         for (int y = 0; y < ytiles; y++) {
                 for (int x = 0; x < xtiles; x++) {
                         bool end_of_frame = (x == xtiles - 1 &&
@@ -175,8 +178,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
                                       vc4_rt_format_is_565(csurf->base.format) ?
                                       VC4_LOADSTORE_TILE_BUFFER_BGR565 :
                                       VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
-                                cl_reloc(vc4, &vc4->rcl, ctex->bo,
-                                         csurf->offset);
+                                cl_reloc_hindex(&vc4->rcl, color_hindex,
+                                                csurf->offset);
 
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
                         }
@@ -191,8 +194,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
                                       (zsurf->tiling <<
                                        VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                 cl_u8(&vc4->rcl, 0);
-                                cl_reloc(vc4, &vc4->rcl, ztex->bo,
-                                         zsurf->offset);
+                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
+                                                zsurf->offset);
 
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
                         }
@@ -211,8 +214,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
 
                         cl_start_reloc(&vc4->rcl, 1);
                         cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
-                        cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
-                                 (y * xtiles + x) * 32);
+                        cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
+                                        (y * xtiles + x) * 32);
 
                         if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
@@ -225,11 +228,11 @@ vc4_setup_rcl(struct vc4_context *vc4)
                                        VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                 cl_u8(&vc4->rcl,
                                       VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
-                                cl_reloc(vc4, &vc4->rcl, ztex->bo,
-                                         zsurf->offset |
-                                         ((end_of_frame &&
-                                           !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
-                                          VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
+                                                zsurf->offset |
+                                                ((end_of_frame &&
+                                                  !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
+                                                 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
 
                                 coords_emitted = false;
                         }