util: use C99 declaration in the for-loop hash_table_foreach() macro
[mesa.git] / src / gallium / drivers / vc4 / vc4_context.c
index cc57486e1033ed9eba97f7a03eb6225390212edc..ffd7d4c8555fe72adefb4a457a3b5f2926e7675a 100644 (file)
@@ -29,6 +29,7 @@
 #include "util/u_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_blitter.h"
+#include "util/u_upload_mgr.h"
 #include "indices/u_primconvert.h"
 #include "pipe/p_screen.h"
 
 #include "vc4_context.h"
 #include "vc4_resource.h"
 
-/**
- * Emits a no-op STORE_TILE_BUFFER_GENERAL.
- *
- * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
- * some sort before another load is triggered.
- */
-static void
-vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
-{
-        if (!*coords_emitted)
-                return;
-
-        cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
-        cl_u8(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
-        cl_u8(&vc4->rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
-                          VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
-                          VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
-        cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
-
-        *coords_emitted = false;
-}
-
-/**
- * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
- *
- * The tile coordinates packet triggers a pending load if there is one, are
- * used for clipping during rendering, and determine where loads/stores happen
- * relative to their base address.
- */
-static void
-vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
-                       bool *coords_emitted)
-{
-        if (*coords_emitted)
-                return;
-
-        cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
-        cl_u8(&vc4->rcl, x);
-        cl_u8(&vc4->rcl, y);
-
-        *coords_emitted = true;
-}
-
-static void
-vc4_setup_rcl(struct vc4_context *vc4)
+void
+vc4_flush(struct pipe_context *pctx)
 {
-        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
-        struct vc4_resource *ctex = csurf ? vc4_resource(csurf->base.texture) : NULL;
-        struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
-        struct vc4_resource *ztex = zsurf ? vc4_resource(zsurf->base.texture) : NULL;
-
-        if (!csurf)
-                vc4->resolve &= ~PIPE_CLEAR_COLOR0;
-        if (!zsurf)
-                vc4->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
-        uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
-        uint32_t width = vc4->framebuffer.width;
-        uint32_t height = vc4->framebuffer.height;
-        uint32_t xtiles = align(width, 64) / 64;
-        uint32_t ytiles = align(height, 64) / 64;
-
-#if 0
-        fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
-                vc4->resolve,
-                vc4->cleared,
-                resolve_uncleared);
-#endif
-
-        cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
-        cl_u32(&vc4->rcl, vc4->clear_color[0]);
-        cl_u32(&vc4->rcl, vc4->clear_color[1]);
-        cl_u32(&vc4->rcl, vc4->clear_depth);
-        cl_u8(&vc4->rcl, vc4->clear_stencil);
-
-        /* The rendering mode config determines the pointer that's used for
-         * VC4_PACKET_STORE_MS_TILE_BUFFER address computations.  The kernel
-         * could handle a no-relocation rendering mode config and deny those
-         * packets, but instead we just tell the kernel we're doing our color
-         * rendering to the Z buffer, and just don't emit any of those
-         * packets.
-         */
-        struct vc4_surface *render_surf = csurf ? csurf : zsurf;
-        struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
-
-        cl_start_reloc(&vc4->rcl, 1);
-        cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
-        cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
-        cl_u16(&vc4->rcl, width);
-        cl_u16(&vc4->rcl, height);
-        cl_u16(&vc4->rcl, ((render_surf->tiling <<
-                            VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
-                           (vc4_rt_format_is_565(render_surf->base.format) ?
-                            VC4_RENDER_CONFIG_FORMAT_BGR565 :
-                            VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
-                           VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));
-
-        /* The tile buffer normally gets cleared when the previous tile is
-         * stored.  If the clear values changed between frames, then the tile
-         * buffer has stale clear values in it, so we have to do a store in
-         * None mode (no writes) so that we trigger the tile buffer clear.
-         *
-         * Excess clearing is only a performance cost, since per-tile contents
-         * will be loaded/stored in the loop below.
-         */
-        if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
-                            PIPE_CLEAR_DEPTH |
-                            PIPE_CLEAR_STENCIL)) {
-                cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
-                cl_u8(&vc4->rcl, 0);
-                cl_u8(&vc4->rcl, 0);
-
-                cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
-                cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
-                cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
-        }
+        struct vc4_context *vc4 = vc4_context(pctx);
 
-        for (int y = 0; y < ytiles; y++) {
-                for (int x = 0; x < xtiles; x++) {
-                        bool end_of_frame = (x == xtiles - 1 &&
-                                             y == ytiles - 1);
-                        bool coords_emitted = false;
-
-                        /* Note that the load doesn't actually occur until the
-                         * tile coords packet is processed, and only one load
-                         * may be outstanding at a time.
-                         */
-                        if (resolve_uncleared & PIPE_CLEAR_COLOR) {
-                                vc4_store_before_load(vc4, &coords_emitted);
-
-                                cl_start_reloc(&vc4->rcl, 1);
-                                cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
-                                cl_u8(&vc4->rcl,
-                                      VC4_LOADSTORE_TILE_BUFFER_COLOR |
-                                      (csurf->tiling <<
-                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
-                                cl_u8(&vc4->rcl,
-                                      vc4_rt_format_is_565(csurf->base.format) ?
-                                      VC4_LOADSTORE_TILE_BUFFER_BGR565 :
-                                      VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
-                                cl_reloc(vc4, &vc4->rcl, ctex->bo,
-                                         csurf->offset);
-
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-                        }
-
-                        if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-                                vc4_store_before_load(vc4, &coords_emitted);
-
-                                cl_start_reloc(&vc4->rcl, 1);
-                                cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
-                                cl_u8(&vc4->rcl,
-                                      VC4_LOADSTORE_TILE_BUFFER_ZS |
-                                      (zsurf->tiling <<
-                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
-                                cl_u8(&vc4->rcl, 0);
-                                cl_reloc(vc4, &vc4->rcl, ztex->bo,
-                                         zsurf->offset);
-
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-                        }
-
-                        /* Clipping depends on tile coordinates having been
-                         * emitted, so make sure it's happened even if
-                         * everything was cleared to start.
-                         */
-                        vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-
-                        cl_start_reloc(&vc4->rcl, 1);
-                        cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
-                        cl_reloc(vc4, &vc4->rcl, vc4->tile_alloc,
-                                 (y * xtiles + x) * 32);
-
-                        if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-
-                                cl_start_reloc(&vc4->rcl, 1);
-                                cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
-                                cl_u8(&vc4->rcl,
-                                      VC4_LOADSTORE_TILE_BUFFER_ZS |
-                                      (zsurf->tiling <<
-                                       VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
-                                cl_u8(&vc4->rcl,
-                                      VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
-                                cl_reloc(vc4, &vc4->rcl, ztex->bo,
-                                         zsurf->offset |
-                                         ((end_of_frame &&
-                                           !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
-                                          VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
-
-                                coords_emitted = false;
-                        }
-
-                        if (vc4->resolve & PIPE_CLEAR_COLOR0) {
-                                vc4_tile_coordinates(vc4, x, y, &coords_emitted);
-                                if (end_of_frame) {
-                                        cl_u8(&vc4->rcl,
-                                              VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
-                                } else {
-                                        cl_u8(&vc4->rcl,
-                                              VC4_PACKET_STORE_MS_TILE_BUFFER);
-                                }
-
-                                coords_emitted = false;
-                        }
-
-                        /* One of the bits needs to have been set that would
-                         * have triggered an EOF.
-                         */
-                        assert(vc4->resolve & (PIPE_CLEAR_COLOR0 |
-                                               PIPE_CLEAR_DEPTH |
-                                               PIPE_CLEAR_STENCIL));
-                        /* Any coords emitted must also have been consumed by
-                         * a store.
-                         */
-                        assert(!coords_emitted);
-                }
+        hash_table_foreach(vc4->jobs, entry) {
+                struct vc4_job *job = entry->data;
+                vc4_job_submit(vc4, job);
         }
 }
 
-void
-vc4_flush(struct pipe_context *pctx)
+static void
+vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+               unsigned flags)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
 
-        if (!vc4->needs_flush)
-                return;
-
-        /* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
-        cl_u8(&vc4->bcl, VC4_PACKET_FLUSH);
-
-        cl_u8(&vc4->bcl, VC4_PACKET_NOP);
-        cl_u8(&vc4->bcl, VC4_PACKET_HALT);
-
-        vc4_setup_rcl(vc4);
+        vc4_flush(pctx);
 
-        if (vc4_debug & VC4_DEBUG_CL) {
-                fprintf(stderr, "BCL:\n");
-                vc4_dump_cl(vc4->bcl.base, vc4->bcl.end - vc4->bcl.base, false);
-                fprintf(stderr, "RCL:\n");
-                vc4_dump_cl(vc4->rcl.base, vc4->rcl.end - vc4->rcl.base, true);
-        }
+        if (fence) {
+                struct pipe_screen *screen = pctx->screen;
+                int fd = -1;
 
-        struct drm_vc4_submit_cl submit;
-        memset(&submit, 0, sizeof(submit));
-
-        submit.bo_handles = vc4->bo_handles.base;
-        submit.bo_handle_count = (vc4->bo_handles.next -
-                                  vc4->bo_handles.base) / 4;
-        submit.bin_cl = vc4->bcl.base;
-        submit.bin_cl_size = vc4->bcl.next - vc4->bcl.base;
-        submit.render_cl = vc4->rcl.base;
-        submit.render_cl_size = vc4->rcl.next - vc4->rcl.base;
-        submit.shader_rec = vc4->shader_rec.base;
-        submit.shader_rec_size = vc4->shader_rec.next - vc4->shader_rec.base;
-        submit.shader_rec_count = vc4->shader_rec_count;
-        submit.uniforms = vc4->uniforms.base;
-        submit.uniforms_size = vc4->uniforms.next - vc4->uniforms.base;
-
-        if (!(vc4_debug & VC4_DEBUG_NORAST)) {
-                int ret;
-
-#ifndef USE_VC4_SIMULATOR
-                ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
-#else
-                ret = vc4_simulator_flush(vc4, &submit);
-#endif
-                if (ret) {
-                        fprintf(stderr, "VC4 submit failed\n");
-                        abort();
+                if (flags & PIPE_FLUSH_FENCE_FD) {
+                        /* The vc4_fence takes ownership of the returned fd. */
+                        drmSyncobjExportSyncFile(vc4->fd, vc4->job_syncobj,
+                                                 &fd);
                 }
-        }
 
-        vc4_reset_cl(&vc4->bcl);
-        vc4_reset_cl(&vc4->rcl);
-        vc4_reset_cl(&vc4->shader_rec);
-        vc4_reset_cl(&vc4->uniforms);
-        vc4_reset_cl(&vc4->bo_handles);
-        struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
-        for (int i = 0; i < submit.bo_handle_count; i++)
-                vc4_bo_unreference(&referenced_bos[i]);
-        vc4_reset_cl(&vc4->bo_pointers);
-        vc4->shader_rec_count = 0;
-
-        vc4->needs_flush = false;
-        vc4->draw_call_queued = false;
-
-        /* We have no hardware context saved between our draw calls, so we
-         * need to flag the next draw as needing all state emitted.  Emitting
-         * all state at the start of our draws is also what ensures that we
-         * return to the state we need after a previous tile has finished.
-         */
-        vc4->dirty = ~0;
-        vc4->resolve = 0;
-        vc4->cleared = 0;
+                struct vc4_fence *f = vc4_fence_create(vc4->screen,
+                                                       vc4->last_emit_seqno,
+                                                       fd);
+                screen->fence_reference(screen, fence, NULL);
+                *fence = (struct pipe_fence_handle *)f;
+        }
 }
 
+/* We can't flush the texture cache within rendering a tile, so we have to
+ * flush all rendering to the kernel so that the next job reading from the
+ * tile gets a flushed cache.
+ */
 static void
-vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
-               unsigned flags)
+vc4_texture_barrier(struct pipe_context *pctx, unsigned flags)
 {
         vc4_flush(pctx);
 }
 
-/**
- * Flushes the current command lists if they reference the given BO.
- *
- * This helps avoid flushing the command buffers when unnecessary.
- */
-void
-vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
+static void
+vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
+        struct vc4_resource *rsc = vc4_resource(prsc);
 
-        if (!vc4->needs_flush)
-                return;
-
-        /* Walk all the referenced BOs in the drawing command list to see if
-         * they match.
-         */
-        struct vc4_bo **referenced_bos = vc4->bo_pointers.base;
-        for (int i = 0; i < (vc4->bo_handles.next -
-                             vc4->bo_handles.base) / 4; i++) {
-                if (referenced_bos[i] == bo) {
-                        vc4_flush(pctx);
-                        return;
-                }
-        }
+        rsc->initialized_buffers = 0;
 
-        /* Also check for the Z/color buffers, since the references to those
-         * are only added immediately before submit.
-         */
-        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
-        if (csurf) {
-                struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
-                if (ctex->bo == bo) {
-                        vc4_flush(pctx);
-                        return;
-                }
-        }
+        struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
+                                                           prsc);
+        if (!entry)
+                return;
 
-        struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
-        if (zsurf) {
-                struct vc4_resource *ztex =
-                        vc4_resource(zsurf->base.texture);
-                if (ztex->bo == bo) {
-                        vc4_flush(pctx);
-                        return;
-                }
-        }
+        struct vc4_job *job = entry->data;
+        if (job->key.zsbuf && job->key.zsbuf->texture == prsc)
+                job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
 }
 
 static void
@@ -392,29 +107,54 @@ vc4_context_destroy(struct pipe_context *pctx)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
 
+        vc4_flush(pctx);
+
         if (vc4->blitter)
                 util_blitter_destroy(vc4->blitter);
 
         if (vc4->primconvert)
                 util_primconvert_destroy(vc4->primconvert);
 
-        util_slab_destroy(&vc4->transfer_pool);
+        if (vc4->uploader)
+                u_upload_destroy(vc4->uploader);
+
+        slab_destroy_child(&vc4->transfer_pool);
+
+        pipe_surface_reference(&vc4->framebuffer.cbufs[0], NULL);
+        pipe_surface_reference(&vc4->framebuffer.zsbuf, NULL);
+
+        if (vc4->yuv_linear_blit_vs)
+                pctx->delete_vs_state(pctx, vc4->yuv_linear_blit_vs);
+        if (vc4->yuv_linear_blit_fs_8bit)
+                pctx->delete_fs_state(pctx, vc4->yuv_linear_blit_fs_8bit);
+        if (vc4->yuv_linear_blit_fs_16bit)
+                pctx->delete_fs_state(pctx, vc4->yuv_linear_blit_fs_16bit);
+
+        vc4_program_fini(pctx);
+
+        if (vc4->screen->has_syncobj) {
+                drmSyncobjDestroy(vc4->fd, vc4->job_syncobj);
+                drmSyncobjDestroy(vc4->fd, vc4->in_syncobj);
+        }
+        if (vc4->in_fence_fd >= 0)
+                close(vc4->in_fence_fd);
 
         ralloc_free(vc4);
 }
 
 struct pipe_context *
-vc4_context_create(struct pipe_screen *pscreen, void *priv)
+vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
 {
         struct vc4_screen *screen = vc4_screen(pscreen);
         struct vc4_context *vc4;
+        int err;
 
         /* Prevent dumping of the shaders built during context setup. */
         uint32_t saved_shaderdb_flag = vc4_debug & VC4_DEBUG_SHADERDB;
         vc4_debug &= ~VC4_DEBUG_SHADERDB;
 
         vc4 = rzalloc(NULL, struct vc4_context);
-        if (vc4 == NULL)
+        if (!vc4)
                 return NULL;
         struct pipe_context *pctx = &vc4->base;
 
@@ -424,6 +164,8 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
         pctx->priv = priv;
         pctx->destroy = vc4_context_destroy;
         pctx->flush = vc4_pipe_flush;
+        pctx->invalidate_resource = vc4_invalidate_resource;
+        pctx->texture_barrier = vc4_texture_barrier;
 
         vc4_draw_init(pctx);
         vc4_state_init(pctx);
@@ -431,17 +173,23 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
         vc4_query_init(pctx);
         vc4_resource_context_init(pctx);
 
-        vc4_init_cl(vc4, &vc4->bcl);
-        vc4_init_cl(vc4, &vc4->rcl);
-        vc4_init_cl(vc4, &vc4->shader_rec);
-        vc4_init_cl(vc4, &vc4->bo_handles);
-
-        vc4->dirty = ~0;
         vc4->fd = screen->fd;
 
-        util_slab_create(&vc4->transfer_pool, sizeof(struct vc4_transfer),
-                         16, UTIL_SLAB_SINGLETHREADED);
-        vc4->blitter = util_blitter_create(pctx);
+        err = vc4_job_init(vc4);
+        if (err)
+                goto fail;
+
+        err = vc4_fence_context_init(vc4);
+        if (err)
+                goto fail;
+
+        slab_create_child(&vc4->transfer_pool, &screen->transfer_pool);
+
+       vc4->uploader = u_upload_create_default(&vc4->base);
+       vc4->base.stream_uploader = vc4->uploader;
+       vc4->base.const_uploader = vc4->uploader;
+
+       vc4->blitter = util_blitter_create(pctx);
         if (!vc4->blitter)
                 goto fail;
 
@@ -452,6 +200,8 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
 
         vc4_debug |= saved_shaderdb_flag;
 
+        vc4->sample_mask = (1 << VC4_MAX_SAMPLES) - 1;
+
         return &vc4->base;
 
 fail: