v3d: do not automatically flush current job for SSBOs and shader images
authorIago Toral Quiroga <itoral@igalia.com>
Mon, 12 Aug 2019 06:36:37 +0000 (08:36 +0200)
committerIago Toral Quiroga <itoral@igalia.com>
Tue, 13 Aug 2019 06:25:15 +0000 (08:25 +0200)
If the current job has a sequence of draw calls involving SSBOs and/or
shader images, we would flush the job in between each draw call.
With this change, we won't flush the current job and we rely on the
application inserting correct barriers by issuing glMemoryBarrier()
when needed.

v2 (Eric):
 - When mapping a buffer for writing, we always need to flush.

Reviewed-by: Eric Anholt <eric@anholt.net>
src/gallium/drivers/v3d/v3d_blit.c
src/gallium/drivers/v3d/v3d_context.h
src/gallium/drivers/v3d/v3d_job.c
src/gallium/drivers/v3d/v3d_resource.c
src/gallium/drivers/v3d/v3dx_draw.c

index 0da19ce380dea62fbc5efdbff80c44d66082c5e0..6c69f00b704f60b5820c43ab7494d7c9d68ee564 100644 (file)
@@ -380,8 +380,8 @@ v3d_tfu(struct pipe_context *pctx,
         if (dst_base_slice->tiling == VC5_TILING_RASTER)
                 return false;
 
-        v3d_flush_jobs_writing_resource(v3d, psrc, false);
-        v3d_flush_jobs_reading_resource(v3d, pdst);
+        v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT);
+        v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT);
 
         struct drm_v3d_submit_tfu tfu = {
                 .ios = (height << 16) | width,
@@ -537,5 +537,6 @@ v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
          * run into unexpected OOMs when blits are used for a large series of
          * texture uploads before using the textures.
          */
-        v3d_flush_jobs_writing_resource(v3d, info.dst.resource, false);
+        v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
+                                        V3D_FLUSH_DEFAULT);
 }
index b2c917df2409d418efb3e74c9a4c0d995c5987d8..133a3fcf09506cd8477e723e83750aaf658fa5f9 100644 (file)
@@ -117,6 +117,23 @@ enum v3d_sampler_state_variant {
         V3D_SAMPLER_STATE_VARIANT_COUNT,
 };
 
+enum v3d_flush_cond {
+        /* Flush job unless we are flushing for transform feedback, where we
+         * handle flushing in the driver via the 'Wait for TF' packet.
+         */
+        V3D_FLUSH_DEFAULT,
+        /* Always flush the job, even for cases where we would normally not
+         * do it, such as transform feedback.
+         */
+        V3D_FLUSH_ALWAYS,
+        /* Flush job if it is not the current FBO job. This is intended to
+         * skip automatic flushes of the current job for resources that we
+         * expect to be externally synchronized by the application using
+         * glMemoryBarrier(), such as SSBOs and shader images.
+         */
+        V3D_FLUSH_NOT_CURRENT_JOB,
+};
+
 struct v3d_sampler_view {
         struct pipe_sampler_view base;
         uint32_t p0;
@@ -615,9 +632,10 @@ void v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job);
 void v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo);
 void v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                      struct pipe_resource *prsc,
-                                     bool always_flush);
+                                     enum v3d_flush_cond flush_cond);
 void v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
-                                     struct pipe_resource *prsc);
+                                     struct pipe_resource *prsc,
+                                     enum v3d_flush_cond flush_cond);
 void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode);
 void v3d_update_compiled_cs(struct v3d_context *v3d);
 
index 17fc41d317ab76c11392475ef037552928337b2f..50ea7ef859c8acf00a48c65b1a780cec738c5d5d 100644 (file)
@@ -184,7 +184,7 @@ v3d_job_writes_resource_from_tf(struct v3d_job *job,
 void
 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                 struct pipe_resource *prsc,
-                                bool always_flush)
+                                enum v3d_flush_cond flush_cond)
 {
         struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
                                                            prsc);
@@ -193,21 +193,23 @@ v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
 
         struct v3d_job *job = entry->data;
 
-        /* For writes from TF in the same job we use the "Wait for TF"
-         * feature provided by the hardware so we don't want to flush.
-         * The exception to this is when the caller is about to map the
-         * resource since in that case we don't have a 'Wait for TF' command
-         * the in command stream. In this scenario the caller is expected
-         * to set 'always_flush' to True.
-         */
         bool needs_flush;
-        if (always_flush) {
-                needs_flush = true;
-        } else if (!v3d->job || v3d->job != job) {
-                /* Write from a different job: always flush */
+        switch (flush_cond) {
+        case V3D_FLUSH_ALWAYS:
                 needs_flush = true;
-        } else {
-                /* Write from currrent job: flush if not TF */
+                break;
+        case V3D_FLUSH_NOT_CURRENT_JOB:
+                needs_flush = !v3d->job || v3d->job != job;
+                break;
+        case V3D_FLUSH_DEFAULT:
+        default:
+                /* For writes from TF in the same job we use the "Wait for TF"
+                 * feature provided by the hardware so we don't want to flush.
+                 * The exception to this is when the caller is about to map the
+                 * resource since in that case we don't have a 'Wait for TF'
+                 * command the in command stream. In this scenario the caller
+                 * is expected to set 'always_flush' to True.
+                 */
                 needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
         }
 
@@ -217,7 +219,8 @@ v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
 
 void
 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
-                                struct pipe_resource *prsc)
+                                struct pipe_resource *prsc,
+                                enum v3d_flush_cond flush_cond)
 {
         struct v3d_resource *rsc = v3d_resource(prsc);
 
@@ -227,18 +230,32 @@ v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
          * caller intends to write to the resource, so we don't care if
          * there was a previous TF write to it.
          */
-        v3d_flush_jobs_writing_resource(v3d, prsc, false);
+        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond);
 
         hash_table_foreach(v3d->jobs, entry) {
                 struct v3d_job *job = entry->data;
 
-                if (_mesa_set_search(job->bos, rsc->bo)) {
-                        v3d_job_submit(v3d, job);
-                        /* Reminder: v3d->jobs is safe to keep iterating even
-                         * after deletion of an entry.
-                         */
+                if (!_mesa_set_search(job->bos, rsc->bo))
                         continue;
+
+                bool needs_flush;
+                switch (flush_cond) {
+                case V3D_FLUSH_NOT_CURRENT_JOB:
+                        needs_flush = !v3d->job || v3d->job != job;
+                        break;
+                case V3D_FLUSH_ALWAYS:
+                case V3D_FLUSH_DEFAULT:
+                default:
+                        needs_flush = true;
                 }
+
+                if (needs_flush)
+                        v3d_job_submit(v3d, job);
+
+                /* Reminder: v3d->jobs is safe to keep iterating even
+                 * after deletion of an entry.
+                 */
+                continue;
         }
 }
 
@@ -311,7 +328,8 @@ v3d_get_job(struct v3d_context *v3d,
 
         for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (cbufs[i]) {
-                        v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture);
+                        v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
+                                                        V3D_FLUSH_DEFAULT);
                         pipe_surface_reference(&job->cbufs[i], cbufs[i]);
 
                         if (cbufs[i]->texture->nr_samples > 1)
@@ -319,7 +337,8 @@ v3d_get_job(struct v3d_context *v3d,
                 }
         }
         if (zsbuf) {
-                v3d_flush_jobs_reading_resource(v3d, zsbuf->texture);
+                v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
+                                                V3D_FLUSH_DEFAULT);
                 pipe_surface_reference(&job->zsbuf, zsbuf);
                 if (zsbuf->texture->nr_samples > 1)
                         job->msaa = true;
@@ -336,7 +355,8 @@ v3d_get_job(struct v3d_context *v3d,
                 struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
                 if (rsc->separate_stencil) {
                         v3d_flush_jobs_reading_resource(v3d,
-                                                        &rsc->separate_stencil->base);
+                                                        &rsc->separate_stencil->base,
+                                                        V3D_FLUSH_DEFAULT);
                         _mesa_hash_table_insert(v3d->write_jobs,
                                                 &rsc->separate_stencil->base,
                                                 job);
index 30c4c2fc81268fe46b0cc90e813f9646828cc832..064709dff123678ac0e3e2264c9d04798fafa021 100644 (file)
@@ -169,7 +169,8 @@ v3d_map_usage_prep(struct pipe_context *pctx,
                         /* If we failed to reallocate, flush users so that we
                          * don't violate any syncing requirements.
                          */
-                        v3d_flush_jobs_reading_resource(v3d, prsc);
+                        v3d_flush_jobs_reading_resource(v3d, prsc,
+                                                        V3D_FLUSH_DEFAULT);
                 }
         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                 /* If we're writing and the buffer is being used by the CL, we
@@ -177,9 +178,11 @@ v3d_map_usage_prep(struct pipe_context *pctx,
                  * to flush if the CL has written our buffer.
                  */
                 if (usage & PIPE_TRANSFER_WRITE)
-                        v3d_flush_jobs_reading_resource(v3d, prsc);
+                        v3d_flush_jobs_reading_resource(v3d, prsc,
+                                                        V3D_FLUSH_ALWAYS);
                 else
-                        v3d_flush_jobs_writing_resource(v3d, prsc, true);
+                        v3d_flush_jobs_writing_resource(v3d, prsc,
+                                                        V3D_FLUSH_ALWAYS);
         }
 
         if (usage & PIPE_TRANSFER_WRITE) {
index 00cbc984e4d59a648eaac55db1ab3b15c6a8c726..26f706a9347c78b81c745c29925ba2cf0efb2a78 100644 (file)
@@ -156,28 +156,34 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                     view->base.format != PIPE_FORMAT_X32_S8X24_UINT)
                         v3d_update_shadow_texture(pctx, &view->base);
 
-                v3d_flush_jobs_writing_resource(v3d, view->texture, false);
+                v3d_flush_jobs_writing_resource(v3d, view->texture,
+                                                V3D_FLUSH_DEFAULT);
         }
 
         /* Flush writes to UBOs. */
         foreach_bit(i, v3d->constbuf[s].enabled_mask) {
                 struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i];
-                if (cb->buffer)
-                        v3d_flush_jobs_writing_resource(v3d, cb->buffer, false);
+                if (cb->buffer) {
+                        v3d_flush_jobs_writing_resource(v3d, cb->buffer,
+                                                        V3D_FLUSH_DEFAULT);
+                }
         }
 
         /* Flush reads/writes to our SSBOs */
         foreach_bit(i, v3d->ssbo[s].enabled_mask) {
                 struct pipe_shader_buffer *sb = &v3d->ssbo[s].sb[i];
-                if (sb->buffer)
-                        v3d_flush_jobs_reading_resource(v3d, sb->buffer);
+                if (sb->buffer) {
+                        v3d_flush_jobs_reading_resource(v3d, sb->buffer,
+                                                        V3D_FLUSH_NOT_CURRENT_JOB);
+                }
         }
 
         /* Flush reads/writes to our image views */
         foreach_bit(i, v3d->shaderimg[s].enabled_mask) {
                 struct v3d_image_view *view = &v3d->shaderimg[s].si[i];
 
-                v3d_flush_jobs_reading_resource(v3d, view->base.resource);
+                v3d_flush_jobs_reading_resource(v3d, view->base.resource,
+                                                V3D_FLUSH_NOT_CURRENT_JOB);
         }
 
         /* Flush writes to our vertex buffers (i.e. from transform feedback) */
@@ -186,7 +192,7 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                         struct pipe_vertex_buffer *vb = &v3d->vertexbuf.vb[i];
 
                         v3d_flush_jobs_writing_resource(v3d, vb->buffer.resource,
-                                                        false);
+                                                        V3D_FLUSH_DEFAULT);
                 }
         }
 }
@@ -206,7 +212,8 @@ v3d_predraw_check_outputs(struct pipe_context *pctx)
 
                         const struct pipe_stream_output_target *target =
                                 so->targets[i];
-                        v3d_flush_jobs_reading_resource(v3d, target->buffer);
+                        v3d_flush_jobs_reading_resource(v3d, target->buffer,
+                                                        V3D_FLUSH_DEFAULT);
                 }
         }
 }
@@ -657,7 +664,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 
         if (info->indirect) {
                 v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer,
-                                                false);
+                                                V3D_FLUSH_DEFAULT);
         }
 
         v3d_predraw_check_outputs(pctx);