vc4: Optimize CL emits by doing size checks up front.
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
index 87f025142ce9ab53d693e58025b67c458ffa4360..d4a9eec7b081b249f628150ba183ec755ffd1d0d 100644 (file)
@@ -104,6 +104,22 @@ vc4_setup_rcl(struct vc4_context *vc4)
                 resolve_uncleared);
 #endif
 
+        uint32_t reloc_size = 9;
+        uint32_t clear_size = 14;
+        uint32_t config_size = 11 + reloc_size;
+        uint32_t loadstore_size = 7 + reloc_size;
+        uint32_t tilecoords_size = 3;
+        uint32_t branch_size = 5 + reloc_size;
+        uint32_t color_store_size = 1;
+        cl_ensure_space(&vc4->rcl,
+                        clear_size +
+                        config_size +
+                        loadstore_size +
+                        xtiles * ytiles * (loadstore_size * 4 +
+                                           tilecoords_size * 3 +
+                                           branch_size +
+                                           color_store_size));
+
         cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
         cl_u32(&vc4->rcl, vc4->clear_color[0]);
         cl_u32(&vc4->rcl, vc4->clear_color[1]);
@@ -119,7 +135,6 @@ vc4_setup_rcl(struct vc4_context *vc4)
          */
         struct vc4_surface *render_surf = csurf ? csurf : zsurf;
         struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
-
         cl_start_reloc(&vc4->rcl, 1);
         cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
         cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
@@ -152,6 +167,10 @@ vc4_setup_rcl(struct vc4_context *vc4)
                 cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
         }
 
+        uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
+        uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
+        uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
+
         for (int y = 0; y < ytiles; y++) {
                 for (int x = 0; x < xtiles; x++) {
                         bool end_of_frame = (x == xtiles - 1 &&
@@ -175,8 +194,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
                                       vc4_rt_format_is_565(csurf->base.format) ?
                                       VC4_LOADSTORE_TILE_BUFFER_BGR565 :
                                       VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
-                                cl_reloc(vc4, &vc4->rcl, ctex->bo,
-                                         csurf->offset);
+                                cl_reloc_hindex(&vc4->rcl, color_hindex,
+                                                csurf->offset);
 
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
                         }
@@ -191,8 +210,8 @@ vc4_setup_rcl(struct vc4_context *vc4)
                                       (zsurf->tiling <<
                                        VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                 cl_u8(&vc4->rcl, 0);
-                                cl_reloc(vc4, &vc4->rcl, ztex->bo,
-                                         zsurf->offset);
+                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
+                                                zsurf->offset);
 
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
                         }
@@ -203,10 +222,16 @@ vc4_setup_rcl(struct vc4_context *vc4)
                          */
                         vc4_tile_coordinates(vc4, x, y, &coords_emitted);
 
+                        /* Wait for the binner before jumping to the first
+                         * tile's lists.
+                         */
+                        if (x == 0 && y == 0)
+                                cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
                         cl_start_reloc(&vc4->rcl, 1);
                         cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
-                        cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
-                                 (y * xtiles + x) * 32);
+                        cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
+                                        (y * xtiles + x) * 32);
 
                         if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
@@ -219,11 +244,11 @@ vc4_setup_rcl(struct vc4_context *vc4)
                                        VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
                                 cl_u8(&vc4->rcl,
                                       VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
-                                cl_reloc(vc4, &vc4->rcl, ztex->bo,
-                                         zsurf->offset |
-                                         ((end_of_frame &&
-                                           !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
-                                          VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+                                cl_reloc_hindex(&vc4->rcl, depth_hindex,
+                                                zsurf->offset |
+                                                ((end_of_frame &&
+                                                  !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
+                                                 VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
 
                                 coords_emitted = false;
                         }
@@ -269,19 +294,21 @@ vc4_flush(struct pipe_context *pctx)
         if (!vc4->needs_flush)
                 return;
 
+        /* Increment the semaphore indicating that binning is done and
+         * unblocking the render thread.  Note that this doesn't act until the
+         * FLUSH completes.
+         */
+        cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
         /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
         cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
 
-        cl_u8(&vc4->bcl, VC4_PACKET_NOP);
-        cl_u8(&vc4->bcl, VC4_PACKET_HALT);
-
         vc4_setup_rcl(vc4);
 
         if (vc4_debug & VC4_DEBUG_CL) {
                 fprintf(stderr, "BCL:\n");
-                vc4_dump_cl(vc4->bcl.base, vc4->bcl.end - vc4->bcl.base, false);
+                vc4_dump_cl(vc4->bcl.base, vc4->bcl.size, false);
                 fprintf(stderr, "RCL:\n");
-                vc4_dump_cl(vc4->rcl.base, vc4->rcl.end - vc4->rcl.base, true);
+                vc4_dump_cl(vc4->rcl.base, vc4->rcl.size, true);
         }
 
         struct drm_vc4_submit_cl submit;
@@ -314,6 +341,16 @@ vc4_flush(struct pipe_context *pctx)
                 }
         }
 
+        vc4->last_emit_seqno = submit.seqno;
+
+        if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
+                if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
+                                    PIPE_TIMEOUT_INFINITE)) {
+                        fprintf(stderr, "Wait failed.\n");
+                        abort();
+                }
+        }
+
         vc4_reset_cl(&vc4->bcl);
         vc4_reset_cl(&vc4->rcl);
         vc4_reset_cl(&vc4->shader_rec);
@@ -342,7 +379,15 @@ static void
 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
                unsigned flags)
 {
+        struct vc4_context *vc4 = vc4_context(pctx);
+
         vc4_flush(pctx);
+
+        if (fence) {
+                struct vc4_fence *f = vc4_fence_create(vc4->screen,
+                                                       vc4->last_emit_seqno);
+                *fence = (struct pipe_fence_handle *)f;
+        }
 }
 
 /**
@@ -350,13 +395,13 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
  *
  * This helps avoid flushing the command buffers when unnecessary.
  */
-void
-vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
+bool
+vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
 
         if (!vc4->needs_flush)
-                return;
+                return false;
 
         /* Walk all the referenced BOs in the drawing command list to see if
          * they match.
@@ -365,8 +410,7 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
         for (int i = 0; i < (vc4->bo_handles.next -
                              vc4->bo_handles.base) / 4; i++) {
                 if (referenced_bos[i] == bo) {
-                        vc4_flush(pctx);
-                        return;
+                        return true;
                 }
         }
 
@@ -377,8 +421,7 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
         if (csurf) {
                 struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
                 if (ctex->bo == bo) {
-                        vc4_flush(pctx);
-                        return;
+                        return true;
                 }
         }
 
@@ -387,10 +430,11 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
                 struct vc4_resource *ztex =
                         vc4_resource(zsurf->base.texture);
                 if (ztex->bo == bo) {
-                        vc4_flush(pctx);
-                        return;
+                        return true;
                 }
         }
+
+        return false;
 }
 
 static void
@@ -406,6 +450,13 @@ vc4_context_destroy(struct pipe_context *pctx)
 
         util_slab_destroy(&vc4->transfer_pool);
 
+        pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
+        pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
+        vc4_bo_unreference(&vc4->tile_alloc);
+        vc4_bo_unreference(&vc4->tile_state);
+
+        vc4_program_fini(pctx);
+
         ralloc_free(vc4);
 }
 
@@ -440,7 +491,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
         vc4_init_cl(vc4, &vc4->bcl);
         vc4_init_cl(vc4, &vc4->rcl);
         vc4_init_cl(vc4, &vc4->shader_rec);
+        vc4_init_cl(vc4, &vc4->uniforms);
         vc4_init_cl(vc4, &vc4->bo_handles);
+        vc4_init_cl(vc4, &vc4->bo_pointers);
 
         vc4->dirty = ~0;
         vc4->fd = screen->fd;