vc4: Implement job shuffling
authorEric Anholt <eric@anholt.net>
Thu, 8 Sep 2016 19:56:11 +0000 (12:56 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 14 Sep 2016 05:25:41 +0000 (06:25 +0100)
Track rendering to each FBO independently and flush rendering only when
necessary.  This lets us avoid the overhead of storing and loading the
frame when an application momentarily switches to rendering to some other
texture in order to continue rendering the main scene.

Improves glmark -b desktop:effect=shadow:windows=4 by 27%
Improves glmark -b
    desktop:blur-radius=5:effect=blur:passes=1:separable=true:windows=4
    by 17%

While I haven't tested other apps, this should help X rendering a lot, and
I've heard GLBenchmark needed it too.

src/gallium/drivers/vc4/vc4_blit.c
src/gallium/drivers/vc4/vc4_context.c
src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_draw.c
src/gallium/drivers/vc4/vc4_job.c
src/gallium/drivers/vc4/vc4_resource.c
src/gallium/drivers/vc4/vc4_simulator.c
src/gallium/drivers/vc4/vc4_state.c

index d3fc8e922ad49ce9d597986f963ad62fcfa60af8..1e056568acb6bc2edd6ad4841447b2fb20a4499c 100644 (file)
@@ -51,10 +51,6 @@ static bool
 vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_job *job = vc4->job;
-        bool old_msaa = job->msaa;
-        int old_tile_width = job->tile_width;
-        int old_tile_height = job->tile_height;
         bool msaa = (info->src.resource->nr_samples > 1 ||
                      info->dst.resource->nr_samples > 1);
         int tile_width = msaa ? 32 : 64;
@@ -115,8 +111,6 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
         if (info->dst.resource->format != info->src.resource->format)
                 return false;
 
-        vc4_flush(pctx);
-
         if (false) {
                 fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
                         info->src.box.x,
@@ -132,11 +126,19 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
         struct pipe_surface *src_surf =
                 vc4_get_blit_surface(pctx, info->src.resource, info->src.level);
 
+        vc4_flush_jobs_reading_resource(vc4, info->src.resource);
+
+        struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL);
         pipe_surface_reference(&job->color_read, src_surf);
-        if (dst_surf->texture->nr_samples > 1)
-                pipe_surface_reference(&job->color_write, dst_surf);
-        else
-                pipe_surface_reference(&job->msaa_color_write, dst_surf);
+
+        /* If we're resolving from MSAA to single sample, we still need to run
+         * the engine in MSAA mode for the load.
+         */
+        if (!job->msaa && info->src.resource->nr_samples > 1) {
+                job->msaa = true;
+                job->tile_width = 32;
+                job->tile_height = 32;
+        }
 
         job->draw_min_x = info->dst.box.x;
         job->draw_min_y = info->dst.box.y;
@@ -153,10 +155,6 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
 
         vc4_job_submit(vc4, job);
 
-        job->msaa = old_msaa;
-        job->tile_width = old_tile_width;
-        job->tile_height = old_tile_height;
-
         pipe_surface_reference(&dst_surf, NULL);
         pipe_surface_reference(&src_surf, NULL);
 
index 37b002d5cbad71d11f34f01ee5a42a3c3bcf970a..3863e4432a9837cb09ff6f3ebdd407bbe8da5172 100644 (file)
@@ -41,38 +41,12 @@ void
 vc4_flush(struct pipe_context *pctx)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
-        struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
-        struct vc4_job *job = vc4->job;
-
-        if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) {
-                if (cbuf->texture->nr_samples > 1) {
-                        pipe_surface_reference(&job->msaa_color_write, cbuf);
-                } else {
-                        pipe_surface_reference(&job->color_write, cbuf);
-                }
-
-                pipe_surface_reference(&job->color_read, cbuf);
-        }
-
-        if (zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
-                if (zsbuf->texture->nr_samples > 1) {
-                        pipe_surface_reference(&job->msaa_zs_write, zsbuf);
-                } else {
-                        pipe_surface_reference(&job->zs_write, zsbuf);
-                }
 
-                pipe_surface_reference(&job->zs_read, zsbuf);
+        struct hash_entry *entry;
+        hash_table_foreach(vc4->jobs, entry) {
+                struct vc4_job *job = entry->data;
+                vc4_job_submit(vc4, job);
         }
-
-        vc4_job_submit(vc4, job);
-
-        /* We have no hardware context saved between our draw calls, so we
-         * need to flag the next draw as needing all state emitted.  Emitting
-         * all state at the start of our draws is also what ensures that we
-         * return to the state we need after a previous tile has finished.
-         */
-        vc4->dirty = ~0;
 }
 
 static void
@@ -92,64 +66,18 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
         }
 }
 
-/**
- * Flushes the current command lists if they reference the given BO.
- *
- * This helps avoid flushing the command buffers when unnecessary.
- */
-bool
-vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo,
-                     bool include_reads)
-{
-        struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_job *job = vc4->job;
-
-        if (!job->needs_flush)
-                return false;
-
-        /* Walk all the referenced BOs in the drawing command list to see if
-         * they match.
-         */
-        if (include_reads) {
-                struct vc4_bo **referenced_bos = job->bo_pointers.base;
-                for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
-                        if (referenced_bos[i] == bo) {
-                                return true;
-                        }
-                }
-        }
-
-        /* Also check for the Z/color buffers, since the references to those
-         * are only added immediately before submit.
-         */
-        struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
-        if (csurf) {
-                struct vc4_resource *ctex = vc4_resource(csurf->base.texture);
-                if (ctex->bo == bo) {
-                        return true;
-                }
-        }
-
-        struct vc4_surface *zsurf = vc4_surface(vc4->framebuffer.zsbuf);
-        if (zsurf) {
-                struct vc4_resource *ztex =
-                        vc4_resource(zsurf->base.texture);
-                if (ztex->bo == bo) {
-                        return true;
-                }
-        }
-
-        return false;
-}
-
 static void
 vc4_invalidate_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        struct pipe_surface *zsurf = vc4->framebuffer.zsbuf;
-
-        if (zsurf && zsurf->texture == prsc)
-                vc4->job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
+        struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
+                                                           prsc);
+        if (!entry)
+                return;
+
+        struct vc4_job *job = entry->data;
+        if (job->key.zsbuf && job->key.zsbuf->texture == prsc)
+                job->resolve &= ~(PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL);
 }
 
 static void
@@ -157,6 +85,8 @@ vc4_context_destroy(struct pipe_context *pctx)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
 
+        vc4_flush(pctx);
+
         if (vc4->blitter)
                 util_blitter_destroy(vc4->blitter);
 
@@ -205,8 +135,7 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
         vc4_query_init(pctx);
         vc4_resource_context_init(pctx);
 
-        vc4->job = rzalloc(vc4, struct vc4_job);
-        vc4_job_init(vc4->job);
+        vc4_job_init(vc4);
 
         vc4->fd = screen->fd;
 
index 38dc3a5998c0566ef0610dbec12e4a3e7992ee0a..87d8c79241bf532e5e47b7911a41f8fae0f39742 100644 (file)
@@ -190,6 +190,12 @@ struct vc4_vertex_stateobj {
         unsigned num_elements;
 };
 
+/* Hash table key for vc4->jobs */
+struct vc4_job_key {
+        struct pipe_surface *cbuf;
+        struct pipe_surface *zsbuf;
+};
+
 /**
  * A complete bin/render job.
  *
@@ -266,6 +272,8 @@ struct vc4_job {
          * the current job.
          */
         uint32_t draw_calls_queued;
+
+        struct vc4_job_key key;
 };
 
 struct vc4_context {
@@ -274,9 +282,21 @@ struct vc4_context {
         int fd;
         struct vc4_screen *screen;
 
-        /** The render job for the currently bound FBO. */
+        /** The 3D rendering job for the currently bound FBO. */
         struct vc4_job *job;
 
+        /* Map from struct vc4_job_key to the job for that FBO.
+         */
+        struct hash_table *jobs;
+
+        /**
+         * Map from vc4_resource to a job writing to that resource.
+         *
+         * Primarily for flushing jobs rendering to textures that are now
+         * being read from.
+         */
+        struct hash_table *write_jobs;
+
         struct slab_mempool transfer_pool;
         struct blitter_context *blitter;
 
@@ -404,7 +424,8 @@ void vc4_program_fini(struct pipe_context *pctx);
 void vc4_query_init(struct pipe_context *pctx);
 void vc4_simulator_init(struct vc4_screen *screen);
 int vc4_simulator_flush(struct vc4_context *vc4,
-                        struct drm_vc4_submit_cl *args);
+                        struct drm_vc4_submit_cl *args,
+                        struct vc4_job *job);
 
 void vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader);
 void vc4_write_uniforms(struct vc4_context *vc4,
@@ -413,11 +434,17 @@ void vc4_write_uniforms(struct vc4_context *vc4,
                         struct vc4_texture_stateobj *texstate);
 
 void vc4_flush(struct pipe_context *pctx);
-void vc4_job_init(struct vc4_job *job);
+void vc4_job_init(struct vc4_context *vc4);
+struct vc4_job *vc4_get_job(struct vc4_context *vc4,
+                            struct pipe_surface *cbuf,
+                            struct pipe_surface *zsbuf);
+struct vc4_job *vc4_get_job_for_fbo(struct vc4_context *vc4);
+
 void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job);
-void vc4_job_reset(struct vc4_job *job);
-bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo,
-                          bool include_reads);
+void vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
+                                     struct pipe_resource *prsc);
+void vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
+                                     struct pipe_resource *prsc);
 void vc4_emit_state(struct pipe_context *pctx);
 void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c);
 struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c);
index 52a53db9a29567ef5f8b4ff01855f1263e8e9694..bbdb02010f6bee154f3395fd5ef0dad3b631bc67 100644 (file)
@@ -116,9 +116,11 @@ vc4_start_draw(struct vc4_context *vc4, int vert_count)
 }
 
 static void
-vc4_update_shadow_textures(struct pipe_context *pctx,
+vc4_predraw_check_textures(struct pipe_context *pctx,
                            struct vc4_texture_stateobj *stage_tex)
 {
+        struct vc4_context *vc4 = vc4_context(pctx);
+
         for (int i = 0; i < stage_tex->num_textures; i++) {
                 struct pipe_sampler_view *view = stage_tex->textures[i];
                 if (!view)
@@ -126,6 +128,8 @@ vc4_update_shadow_textures(struct pipe_context *pctx,
                 struct vc4_resource *rsc = vc4_resource(view->texture);
                 if (rsc->shadow_parent)
                         vc4_update_shadow_baselevel_texture(pctx, view);
+
+                vc4_flush_jobs_writing_resource(vc4, view->texture);
         }
 }
 
@@ -263,12 +267,12 @@ static void
 vc4_hw_2116_workaround(struct pipe_context *pctx)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_job *job = vc4->job;
+        struct vc4_job *job = vc4_get_job_for_fbo(vc4);
 
         if (job->draw_calls_queued == 0x1ef0) {
                 perf_debug("Flushing batch due to HW-2116 workaround "
                            "(too many draw calls per scene\n");
-                vc4_flush(pctx);
+                vc4_job_submit(vc4, job);
         }
 }
 
@@ -276,7 +280,6 @@ static void
 vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_job *job = vc4->job;
 
         if (info->mode >= PIPE_PRIM_QUADS) {
                 util_primconvert_save_index_buffer(vc4->primconvert, &vc4->indexbuf);
@@ -288,11 +291,13 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         }
 
         /* Before setting up the draw, do any fixup blits necessary. */
-        vc4_update_shadow_textures(pctx, &vc4->verttex);
-        vc4_update_shadow_textures(pctx, &vc4->fragtex);
+        vc4_predraw_check_textures(pctx, &vc4->verttex);
+        vc4_predraw_check_textures(pctx, &vc4->fragtex);
 
         vc4_hw_2116_workaround(pctx);
 
+        struct vc4_job *job = vc4_get_job_for_fbo(vc4);
+
         vc4_get_draw_cl_space(job, info->count);
 
         if (vc4->prim_mode != info->mode) {
@@ -466,14 +471,15 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
           const union pipe_color_union *color, double depth, unsigned stencil)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_job *job = vc4->job;
+        struct vc4_job *job = vc4_get_job_for_fbo(vc4);
 
         /* We can't flag new buffers for clearing once we've queued draws.  We
          * could avoid this by using the 3d engine to clear.
          */
         if (job->draw_calls_queued) {
                 perf_debug("Flushing rendering to process new clear.\n");
-                vc4_flush(pctx);
+                vc4_job_submit(vc4, job);
+                job = vc4_get_job_for_fbo(vc4);
         }
 
         /* Clearing ZS will clear both Z and stencil, so if we're trying to
index cb20853fa497abbac0b8b4fd06a4dfbed8dce8a6..d39472ef131c00266d63e7505df32809a17205c1 100644 (file)
 
 #include <xf86drm.h>
 #include "vc4_context.h"
+#include "util/hash_table.h"
 
-void
-vc4_job_init(struct vc4_job *job)
+static void
+remove_from_ht(struct hash_table *ht, void *key)
 {
-        vc4_init_cl(job, &job->bcl);
-        vc4_init_cl(job, &job->shader_rec);
-        vc4_init_cl(job, &job->uniforms);
-        vc4_init_cl(job, &job->bo_handles);
-        vc4_init_cl(job, &job->bo_pointers);
-        vc4_job_reset(job);
+        struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+        _mesa_hash_table_remove(ht, entry);
 }
 
-void
-vc4_job_reset(struct vc4_job *job)
+static void
+vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
 {
         struct vc4_bo **referenced_bos = job->bo_pointers.base;
         for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
                 vc4_bo_unreference(&referenced_bos[i]);
         }
-        vc4_reset_cl(&job->bcl);
-        vc4_reset_cl(&job->shader_rec);
-        vc4_reset_cl(&job->uniforms);
-        vc4_reset_cl(&job->bo_handles);
-        vc4_reset_cl(&job->bo_pointers);
-        job->shader_rec_count = 0;
 
-        job->needs_flush = false;
-        job->draw_calls_queued = 0;
+        remove_from_ht(vc4->jobs, &job->key);
 
-        job->resolve = 0;
-        job->cleared = 0;
+        if (job->color_write) {
+                remove_from_ht(vc4->write_jobs, job->color_write->texture);
+                pipe_surface_reference(&job->color_write, NULL);
+        }
+        if (job->msaa_color_write) {
+                remove_from_ht(vc4->write_jobs, job->msaa_color_write->texture);
+                pipe_surface_reference(&job->msaa_color_write, NULL);
+        }
+        if (job->zs_write) {
+                remove_from_ht(vc4->write_jobs, job->zs_write->texture);
+                pipe_surface_reference(&job->zs_write, NULL);
+        }
+        if (job->msaa_zs_write) {
+                remove_from_ht(vc4->write_jobs, job->msaa_zs_write->texture);
+                pipe_surface_reference(&job->msaa_zs_write, NULL);
+        }
+
+        pipe_surface_reference(&job->color_read, NULL);
+        pipe_surface_reference(&job->zs_read, NULL);
+
+        if (vc4->job == job)
+                vc4->job = NULL;
+
+        ralloc_free(job);
+}
+
+static struct vc4_job *
+vc4_job_create(struct vc4_context *vc4)
+{
+        struct vc4_job *job = rzalloc(vc4, struct vc4_job);
+
+        vc4_init_cl(job, &job->bcl);
+        vc4_init_cl(job, &job->shader_rec);
+        vc4_init_cl(job, &job->uniforms);
+        vc4_init_cl(job, &job->bo_handles);
+        vc4_init_cl(job, &job->bo_pointers);
 
         job->draw_min_x = ~0;
         job->draw_min_y = ~0;
         job->draw_max_x = 0;
         job->draw_max_y = 0;
 
-        pipe_surface_reference(&job->color_write, NULL);
-        pipe_surface_reference(&job->color_read, NULL);
-        pipe_surface_reference(&job->msaa_color_write, NULL);
-        pipe_surface_reference(&job->zs_write, NULL);
-        pipe_surface_reference(&job->zs_read, NULL);
-        pipe_surface_reference(&job->msaa_zs_write, NULL);
+        return job;
+}
+
+void
+vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
+                                struct pipe_resource *prsc)
+{
+        struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
+                                                           prsc);
+        if (entry) {
+                struct vc4_job *job = entry->data;
+                vc4_job_submit(vc4, job);
+        }
+}
+
+void
+vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
+                                struct pipe_resource *prsc)
+{
+        struct vc4_resource *rsc = vc4_resource(prsc);
+
+        vc4_flush_jobs_writing_resource(vc4, prsc);
+
+        struct hash_entry *entry;
+        hash_table_foreach(vc4->jobs, entry) {
+                struct vc4_job *job = entry->data;
+
+                struct vc4_bo **referenced_bos = job->bo_pointers.base;
+                for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
+                        if (referenced_bos[i] == rsc->bo) {
+                                vc4_job_submit(vc4, job);
+                                continue;
+                        }
+                }
+
+                /* Also check for the Z/color buffers, since the references to
+                 * those are only added immediately before submit.
+                 */
+                if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
+                        struct vc4_resource *ctex =
+                                vc4_resource(job->color_read->texture);
+                        if (ctex->bo == rsc->bo) {
+                                vc4_job_submit(vc4, job);
+                                continue;
+                        }
+                }
+
+                if (job->zs_read && !(job->cleared &
+                                      (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
+                        struct vc4_resource *ztex =
+                                vc4_resource(job->zs_read->texture);
+                        if (ztex->bo == rsc->bo) {
+                                vc4_job_submit(vc4, job);
+                                continue;
+                        }
+                }
+        }
+}
+
+/**
+ * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
+ *
+ * If we've already started rendering to this FBO, then return old same job,
+ * otherwise make a new one.  If we're beginning rendering to an FBO, make
+ * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
+ * have been flushed.
+ */
+struct vc4_job *
+vc4_get_job(struct vc4_context *vc4,
+            struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
+{
+        /* Return the existing job for this FBO if we have one */
+        struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
+        struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
+                                                           &local_key);
+        if (entry)
+                return entry->data;
+
+        /* Creating a new job.  Make sure that any previous jobs reading or
+         * writing these buffers are flushed.
+         */
+        if (cbuf)
+                vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
+        if (zsbuf)
+                vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
+
+        struct vc4_job *job = vc4_job_create(vc4);
+
+        if (cbuf) {
+                if (cbuf->texture->nr_samples > 1) {
+                        job->msaa = true;
+                        pipe_surface_reference(&job->msaa_color_write, cbuf);
+                } else {
+                        pipe_surface_reference(&job->color_write, cbuf);
+                }
+        }
+
+        if (zsbuf) {
+                if (zsbuf->texture->nr_samples > 1) {
+                        job->msaa = true;
+                        pipe_surface_reference(&job->msaa_zs_write, zsbuf);
+                } else {
+                        pipe_surface_reference(&job->zs_write, zsbuf);
+                }
+        }
+
+        if (job->msaa) {
+                job->tile_width = 32;
+                job->tile_height = 32;
+        } else {
+                job->tile_width = 64;
+                job->tile_height = 64;
+        }
+
+        if (cbuf)
+                _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
+        if (zsbuf)
+                _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
+
+        job->key.cbuf = cbuf;
+        job->key.zsbuf = zsbuf;
+        _mesa_hash_table_insert(vc4->jobs, &job->key, job);
+
+        return job;
+}
+
+struct vc4_job *
+vc4_get_job_for_fbo(struct vc4_context *vc4)
+{
+        if (vc4->job)
+                return vc4->job;
+
+        struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
+        struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
+        struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
+
+        /* The dirty flags are tracking what's been updated while vc4->job has
+         * been bound, so set them all to ~0 when switching between jobs.  We
+         * also need to reset all state at the start of rendering.
+         */
+        vc4->dirty = ~0;
+
+        /* Set up the read surfaces in the job.  If they aren't actually
+         * getting read (due to a clear starting the frame), job->cleared will
+         * mask out the read.
+         */
+        pipe_surface_reference(&job->color_read, cbuf);
+        pipe_surface_reference(&job->zs_read, zsbuf);
+
+        /* If we're binding to uninitialized buffers, no need to load their
+         * contents before drawing.
+         */
+        if (cbuf) {
+                struct vc4_resource *rsc = vc4_resource(cbuf->texture);
+                if (!rsc->writes)
+                        job->cleared |= PIPE_CLEAR_COLOR0;
+        }
+
+        if (zsbuf) {
+                struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
+                if (!rsc->writes)
+                        job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
+        }
+
+        job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
+                                         job->tile_width);
+        job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
+                                         job->tile_height);
+
+        vc4->job = job;
+
+        return job;
 }
 
 static void
@@ -166,15 +356,14 @@ void
 vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
 {
         if (!job->needs_flush)
-                return;
+                goto done;
 
         /* The RCL setup would choke if the draw bounds cause no drawing, so
          * just drop the drawing if that's the case.
          */
         if (job->draw_max_x <= job->draw_min_x ||
             job->draw_max_y <= job->draw_min_y) {
-                vc4_job_reset(job);
-                return;
+                goto done;
         }
 
         if (vc4_debug & VC4_DEBUG_CL) {
@@ -275,7 +464,7 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
 #ifndef USE_VC4_SIMULATOR
                 ret = drmIoctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
 #else
-                ret = vc4_simulator_flush(vc4, &submit);
+                ret = vc4_simulator_flush(vc4, &submit, job);
 #endif
                 static bool warned = false;
                 if (ret && !warned) {
@@ -304,5 +493,30 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
                 }
         }
 
-        vc4_job_reset(vc4->job);
+done:
+        vc4_job_free(vc4, job);
+}
+
+static bool
+vc4_job_compare(const void *a, const void *b)
+{
+        return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
+}
+
+static uint32_t
+vc4_job_hash(const void *key)
+{
+        return _mesa_hash_data(key, sizeof(struct vc4_job_key));
+}
+
+void
+vc4_job_init(struct vc4_context *vc4)
+{
+        vc4->jobs = _mesa_hash_table_create(vc4,
+                                            vc4_job_hash,
+                                            vc4_job_compare);
+        vc4->write_jobs = _mesa_hash_table_create(vc4,
+                                                  _mesa_hash_pointer,
+                                                  _mesa_key_pointer_equal);
 }
+
index 12469ea192e8210fd464a1d52e594b8a7f137b8f..bfa8f40ba13f90a26be5fa32fb061efdbefd783b 100644 (file)
@@ -115,7 +115,6 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx,
                 blit.filter = PIPE_TEX_FILTER_NEAREST;
 
                 pctx->blit(pctx, &blit);
-                vc4_flush(pctx);
 
                 pipe_resource_reference(&trans->ss_resource, NULL);
         }
@@ -178,20 +177,20 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
                         if (prsc->bind & PIPE_BIND_VERTEX_BUFFER)
                                 vc4->dirty |= VC4_DIRTY_VTXBUF;
                 } else {
-                        /* If we failed to reallocate, flush everything so
-                         * that we don't violate any syncing requirements.
+                        /* If we failed to reallocate, flush users so that we
+                         * don't violate any syncing requirements.
                          */
-                        vc4_flush(pctx);
+                        vc4_flush_jobs_reading_resource(vc4, prsc);
                 }
         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                 /* If we're writing and the buffer is being used by the CL, we
                  * have to flush the CL first.  If we're only reading, we need
                  * to flush if the CL has written our buffer.
                  */
-                if (vc4_cl_references_bo(pctx, rsc->bo,
-                                         usage & PIPE_TRANSFER_WRITE)) {
-                        vc4_flush(pctx);
-                }
+                if (usage & PIPE_TRANSFER_WRITE)
+                        vc4_flush_jobs_reading_resource(vc4, prsc);
+                else
+                        vc4_flush_jobs_writing_resource(vc4, prsc);
         }
 
         if (usage & PIPE_TRANSFER_WRITE)
@@ -245,7 +244,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
                         blit.filter = PIPE_TEX_FILTER_NEAREST;
 
                         pctx->blit(pctx, &blit);
-                        vc4_flush(pctx);
+                        vc4_flush_jobs_writing_resource(vc4, blit.dst.resource);
                 }
 
                 /* The rest of the mapping process should use our temporary. */
index 81d34761b41eb0b8be3d34cab18dea36a5661d3d..b802391aa6e5d89e48902c9d27626baa34cf32f8 100644 (file)
@@ -74,11 +74,10 @@ drm_gem_cma_create(struct drm_device *dev, size_t size)
 }
 
 static int
-vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_exec_info *exec)
+vc4_simulator_pin_bos(struct drm_device *dev, struct vc4_job *job,
+                      struct vc4_exec_info *exec)
 {
         struct drm_vc4_submit_cl *args = exec->args;
-        struct vc4_context *vc4 = dev->vc4;
-        struct vc4_job *job = vc4->job;
         struct vc4_bo **bos = job->bo_pointers.base;
 
         exec->bo_count = args->bo_handle_count;
@@ -220,7 +219,8 @@ vc4_dump_to_file(struct vc4_exec_info *exec)
 }
 
 int
-vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
+vc4_simulator_flush(struct vc4_context *vc4,
+                    struct drm_vc4_submit_cl *args, struct vc4_job *job)
 {
         struct vc4_screen *screen = vc4->screen;
         struct vc4_surface *csurf = vc4_surface(vc4->framebuffer.cbufs[0]);
@@ -257,7 +257,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
 
         exec.args = args;
 
-        ret = vc4_simulator_pin_bos(dev, &exec);
+        ret = vc4_simulator_pin_bos(dev, job, &exec);
         if (ret)
                 return ret;
 
index d697f7c87ffe6c13de8dffbb555e1f644c6090c4..12471589510d221ae532d5db4f2b68f588454bb3 100644 (file)
@@ -406,11 +406,10 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
                           const struct pipe_framebuffer_state *framebuffer)
 {
         struct vc4_context *vc4 = vc4_context(pctx);
-        struct vc4_job *job = vc4->job;
         struct pipe_framebuffer_state *cso = &vc4->framebuffer;
         unsigned i;
 
-        vc4_flush(pctx);
+        vc4->job = NULL;
 
         for (i = 0; i < framebuffer->nr_cbufs; i++)
                 pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
@@ -424,23 +423,6 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
         cso->width = framebuffer->width;
         cso->height = framebuffer->height;
 
-        /* If we're binding to uninitialized buffers, no need to load their
-         * contents before drawing..
-         */
-        if (cso->cbufs[0]) {
-                struct vc4_resource *rsc =
-                        vc4_resource(cso->cbufs[0]->texture);
-                if (!rsc->writes)
-                        job->cleared |= PIPE_CLEAR_COLOR0;
-        }
-
-        if (cso->zsbuf) {
-                struct vc4_resource *rsc =
-                        vc4_resource(cso->zsbuf->texture);
-                if (!rsc->writes)
-                        job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
-        }
-
         /* Nonzero texture mipmap levels are laid out as if they were in
          * power-of-two-sized spaces.  The renderbuffer config infers its
          * stride from the width parameter, so we need to configure our
@@ -461,22 +443,6 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
                          rsc->cpp);
         }
 
-        job->msaa = false;
-        if (cso->cbufs[0])
-                job->msaa = cso->cbufs[0]->texture->nr_samples > 1;
-        else if (cso->zsbuf)
-                job->msaa = cso->zsbuf->texture->nr_samples > 1;
-
-        if (job->msaa) {
-                job->tile_width = 32;
-                job->tile_height = 32;
-        } else {
-                job->tile_width = 64;
-                job->tile_height = 64;
-        }
-        job->draw_tiles_x = DIV_ROUND_UP(cso->width, job->tile_width);
-        job->draw_tiles_y = DIV_ROUND_UP(cso->height, job->tile_height);
-
         vc4->dirty |= VC4_DIRTY_FRAMEBUFFER;
 }