vc4: Just stream out fallback IB contents.
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
index 906af05b44b532ae7d4885a824269f6db01be562..a2b1cac952d9a9accf4bf11d820cab1da5036f1f 100644 (file)
@@ -29,6 +29,7 @@
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_primconvert.h"
 #include "pipe/p_screen.h"
 
@@ -94,8 +95,15 @@ vc4_setup_rcl(struct vc4_context *vc4)
         uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
         uint32_t width = vc4->framebuffer.width;
         uint32_t height = vc4->framebuffer.height;
-        uint32_t xtiles = align(width, 64) / 64;
-        uint32_t ytiles = align(height, 64) / 64;
+        uint32_t stride_in_tiles = align(width, 64) / 64;
+
+        assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
+        uint32_t min_x_tile = vc4->draw_min_x / 64;
+        uint32_t min_y_tile = vc4->draw_min_y / 64;
+        uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64;
+        uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64;
+        uint32_t xtiles = max_x_tile - min_x_tile + 1;
+        uint32_t ytiles = max_y_tile - min_y_tile + 1;
 
 #if 0
         fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
@@ -104,11 +112,31 @@ vc4_setup_rcl(struct vc4_context *vc4)
                 resolve_uncleared);
 #endif
 
-        cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
-        cl_u32(&vc4->rcl, vc4->clear_color[0]);
-        cl_u32(&vc4->rcl, vc4->clear_color[1]);
-        cl_u32(&vc4->rcl, vc4->clear_depth);
-        cl_u8(&vc4->rcl, vc4->clear_stencil);
+        uint32_t reloc_size = 9;
+        uint32_t clear_size = 14;
+        uint32_t config_size = 11 + reloc_size;
+        uint32_t loadstore_size = 7 + reloc_size;
+        uint32_t tilecoords_size = 3;
+        uint32_t branch_size = 5 + reloc_size;
+        uint32_t color_store_size = 1;
+        uint32_t semaphore_size = 1;
+        cl_ensure_space(&vc4->rcl,
+                        clear_size +
+                        config_size +
+                        loadstore_size +
+                        semaphore_size +
+                        xtiles * ytiles * (loadstore_size * 4 +
+                                           tilecoords_size * 3 +
+                                           branch_size +
+                                           color_store_size));
+
+        if (vc4->cleared) {
+                cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
+                cl_u32(&vc4->rcl, vc4->clear_color[0]);
+                cl_u32(&vc4->rcl, vc4->clear_color[1]);
+                cl_u32(&vc4->rcl, vc4->clear_depth);
+                cl_u8(&vc4->rcl, vc4->clear_stencil);
+        }
 
         /* The rendering mode config determines the pointer that's used for
          * VC4_PACKET_STORE_MS_TILE_BUFFER address computations.  The kernel
@@ -128,8 +156,7 @@ vc4_setup_rcl(struct vc4_context *vc4)
                             VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
                            (vc4_rt_format_is_565(render_surf->base.format) ?
                             VC4_RENDER_CONFIG_FORMAT_BGR565 :
-                            VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
-                           VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));
+                            VC4_RENDER_CONFIG_FORMAT_RGBA8888)));
 
         /* The tile buffer normally gets cleared when the previous tile is
          * stored.  If the clear values changed between frames, then the tile
@@ -155,10 +182,10 @@ vc4_setup_rcl(struct vc4_context *vc4)
         uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
         uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
 
-        for (int y = 0; y < ytiles; y++) {
-                for (int x = 0; x < xtiles; x++) {
-                        bool end_of_frame = (x == xtiles - 1 &&
-                                             y == ytiles - 1);
+        for (int y = min_y_tile; y <= max_y_tile; y++) {
+                for (int x = min_x_tile; x <= max_x_tile; x++) {
+                        bool end_of_frame = (x == max_x_tile &&
+                                             y == max_y_tile);
                         bool coords_emitted = false;
 
                         /* Note that the load doesn't actually occur until the
@@ -209,13 +236,13 @@ vc4_setup_rcl(struct vc4_context *vc4)
                         /* Wait for the binner before jumping to the first
                          * tile's lists.
                          */
-                        if (x == 0 && y == 0)
+                        if (x == min_x_tile && y == min_y_tile)
                                 cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
 
                         cl_start_reloc(&vc4->rcl, 1);
                         cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
                         cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
-                                        (y * xtiles + x) * 32);
+                                        (y * stride_in_tiles + x) * 32);
 
                         if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
@@ -278,85 +305,27 @@ vc4_flush(struct pipe_context *pctx)
         if (!vc4->needs_flush)
                 return;
 
+        /* The RCL setup would choke if the draw bounds cause no drawing, so
+         * just drop the drawing if that's the case.
+         */
+        if (vc4->draw_max_x <= vc4->draw_min_x ||
+            vc4->draw_max_y <= vc4->draw_min_y) {
+                vc4_job_reset(vc4);
+                return;
+        }
+
         /* Increment the semaphore indicating that binning is done and
          * unblocking the render thread.  Note that this doesn't act until the
          * FLUSH completes.
          */
+        cl_ensure_space(&vc4->bcl, 8);
         cl_u8(&vc4->bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
         /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
         cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
 
         vc4_setup_rcl(vc4);
 
-        if (vc4_debug & VC4_DEBUG_CL) {
-                fprintf(stderr, "BCL:\n");
-                vc4_dump_cl(vc4->bcl.base, vc4->bcl.end - vc4->bcl.base, false);
-                fprintf(stderr, "RCL:\n");
-                vc4_dump_cl(vc4->rcl.base, vc4->rcl.end - vc4->rcl.base, true);
-        }
-
-        struct drm_vc4_submit_cl submit;
-        memset(&submit, 0, sizeof(submit));
-
-        submit.bo_handles = vc4->bo_handles.base;
-        submit.bo_handle_count = (vc4->bo_handles.next -
-                                  vc4->bo_handles.base) / 4;
-        submit.bin_cl = vc4->bcl.base;
-        submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
-        submit.render_cl = vc4->rcl.base;
-        submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
-        submit.shader_rec = vc4->shader_rec.base;
-        submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
-        submit.shader_rec_count = vc4->shader_rec_count;
-        submit.uniforms = vc4->uniforms.base;
-        submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
-
-        if (!(vc4_debug & VC4_DEBUG_NORAST)) {
-                int ret;
-
-#ifndef USE_VC4_SIMULATOR
-                ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
-#else
-                ret = vc4_simulator_flush(vc4, &submit);
-#endif
-                if (ret) {
-                        fprintf(stderr, "VC4 submit failed\n");
-                        abort();
-                }
-        }
-
-        vc4->last_emit_seqno = submit.seqno;
-
-        if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
-                if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
-                                    PIPE_TIMEOUT_INFINITE)) {
-                        fprintf(stderr, "Wait failed.\n");
-                        abort();
-                }
-        }
-
-        vc4_reset_cl(&vc4->bcl);
-        vc4_reset_cl(&vc4->rcl);
-        vc4_reset_cl(&vc4->shader_rec);
-        vc4_reset_cl(&vc4->uniforms);
-        vc4_reset_cl(&vc4->bo_handles);
-        struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
-        for (int i = 0; i < submit.bo_handle_count; i++)
-                vc4_bo_unreference(&referenced_bos[i]);
-        vc4_reset_cl(&vc4->bo_pointers);
-        vc4->shader_rec_count = 0;
-
-        vc4->needs_flush = false;
-        vc4->draw_call_queued = false;
-
-        /* We have no hardware context saved between our draw calls, so we
-         * need to flag the next draw as needing all state emitted.  Emitting
-         * all state at the start of our draws is also what ensures that we
-         * return to the state we need after a previous tile has finished.
-         */
-        vc4->dirty = ~0;
-        vc4->resolve = 0;
-        vc4->cleared = 0;
+        vc4_job_submit(vc4);
 }
 
 static void
@@ -421,6 +390,16 @@ vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
         return false;
 }
 
+static void
+vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
+{
+        struct vc4_context *vc4 = vc4_context(pctx);
+        struct pipe_surface *zsurf = vc4->framebuffer.zsbuf;
+
+        if (zsurf && zsurf->texture == prsc)
+                vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
+}
+
 static void
 vc4_context_destroy(struct pipe_context *pctx)
 {
@@ -432,6 +411,9 @@ vc4_context_destroy(struct pipe_context *pctx)
         if (vc4->primconvert)
                 util_primconvert_destroy(vc4->primconvert);
 
+        if (vc4->uploader)
+                u_upload_destroy(vc4->uploader);
+
         util_slab_destroy(&vc4->transfer_pool);
 
         pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
@@ -465,6 +447,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
         pctx->priv = priv;
         pctx->destroy = vc4_context_destroy;
         pctx->flush = vc4_pipe_flush;
+        pctx->invalidate_resource = vc4_invalidate_resource;
 
         vc4_draw_init(pctx);
         vc4_state_init(pctx);
@@ -472,14 +455,8 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
         vc4_query_init(pctx);
         vc4_resource_context_init(pctx);
 
-        vc4_init_cl(vc4, &vc4->bcl);
-        vc4_init_cl(vc4, &vc4->rcl);
-        vc4_init_cl(vc4, &vc4->shader_rec);
-        vc4_init_cl(vc4, &vc4->uniforms);
-        vc4_init_cl(vc4, &vc4->bo_handles);
-        vc4_init_cl(vc4, &vc4->bo_pointers);
+        vc4_job_init(vc4);
 
-        vc4->dirty = ~0;
         vc4->fd = screen->fd;
 
         util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
@@ -493,6 +470,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
         if (!vc4->primconvert)
                 goto fail;
 
+        vc4->uploader = u_upload_create(pctx, 16 * 1024, 4,
+                                        PIPE_BIND_INDEX_BUFFER);
+
         vc4_debug |= saved_shaderdb_flag;
 
         return &vc4->base;