v3d: Sync on last CS when non-compute stage uses resource written by CS
authorJose Maria Casanova Crespo <jmcasanova@igalia.com>
Mon, 11 Nov 2019 00:46:24 +0000 (01:46 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 25 Feb 2020 11:41:44 +0000 (11:41 +0000)
When a resource is written by a compute shader and then used by a
non-compute stage we sync on last compute job to guarantee that the
resource has been completely written when the next stage reads resources.

In the other cases how flushes are done guarantee the serialization of
the writes and reads.

To reproduce the failure the following tests should be executed in batch
as last test don't fail when run isolated:

KHR-GLES31.core.shader_image_load_store.basic-allFormats-load-fs
KHR-GLES31.core.shader_image_load_store.basic-allFormats-loadStoreComputeStage
KHR-GLES31.core.shader_image_load_store.basic-allTargets-load-cs
KHR-GLES31.core.shader_image_load_store.advanced-sync-vertexArray

v2: Use fence dep instead of bo_wait (Eric Anholt)
v3: Rename struct names (Iago Toral)
    Document why is not needed on graphics->compute case. (Iago Toral)
    Follow same code pattern of the other update of in_sync_bcl.
v4: Fixed comments style. (Iago Toral)

Fixes KHR-GLES31.core.shader_image_load_store.advanced-sync-vertexArray

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
CC: 19.3 20.0 <mesa-stable@lists.freedesktop.org>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2700>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2700>

src/gallium/drivers/v3d/v3d_blit.c
src/gallium/drivers/v3d/v3d_context.h
src/gallium/drivers/v3d/v3d_job.c
src/gallium/drivers/v3d/v3d_resource.c
src/gallium/drivers/v3d/v3d_resource.h
src/gallium/drivers/v3d/v3dx_draw.c

index c9614fc092ab3f1681a387a68d63c1190be2b9e5..d1e895688bc9f15dcbb5c408e8172773065f3d19 100644 (file)
@@ -128,7 +128,7 @@ v3d_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
         struct pipe_surface *src_surf =
                 v3d_get_blit_surface(pctx, info->src.resource, info->src.level);
 
-        v3d_flush_jobs_reading_resource(v3d, info->src.resource);
+        v3d_flush_jobs_reading_resource(v3d, info->src.resource, false);
 
         struct v3d_job *job = v3d_get_job(v3d, dst_surf, NULL);
         pipe_surface_reference(&job->color_read, src_surf);
@@ -381,8 +381,8 @@ v3d_tfu(struct pipe_context *pctx,
         if (dst_base_slice->tiling == VC5_TILING_RASTER)
                 return false;
 
-        v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT);
-        v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT);
+        v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false);
+        v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false);
 
         struct drm_v3d_submit_tfu tfu = {
                 .ios = (height << 16) | width,
@@ -539,5 +539,5 @@ v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
          * texture uploads before using the textures.
          */
         v3d_flush_jobs_writing_resource(v3d, info.dst.resource,
-                                        V3D_FLUSH_DEFAULT);
+                                        V3D_FLUSH_DEFAULT, false);
 }
index e3a5be5fea2fce3f8d6a3651bdd9a431839d0da7..3a18a0c4fc4a3e3086a1b3abf9dc15ddc84ba219 100644 (file)
@@ -519,6 +519,12 @@ struct v3d_context {
 
         bool active_queries;
 
+        /**
+         * If a compute job writes a resource read by a non-compute stage we
+         * should sync on the last compute job.
+         */
+        bool sync_on_last_compute_job;
+
         uint32_t tf_prims_generated;
         uint32_t prims_generated;
 
@@ -653,10 +659,12 @@ void v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job);
 void v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo);
 void v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                      struct pipe_resource *prsc,
-                                     enum v3d_flush_cond flush_cond);
+                                     enum v3d_flush_cond flush_cond,
+                                     bool is_compute_pipeline);
 void v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
                                      struct pipe_resource *prsc,
-                                     enum v3d_flush_cond flush_cond);
+                                     enum v3d_flush_cond flush_cond,
+                                     bool is_compute_pipeline);
 void v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode);
 void v3d_update_compiled_cs(struct v3d_context *v3d);
 
index 32c1157c06c650defe1857cfd5cddaf458fd4df3..58410484d3f93a44350564847f85fae814dd3ec7 100644 (file)
@@ -184,10 +184,23 @@ v3d_job_writes_resource_from_tf(struct v3d_job *job,
 void
 v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
                                 struct pipe_resource *prsc,
-                                enum v3d_flush_cond flush_cond)
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
 {
         struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
                                                            prsc);
+        struct v3d_resource *rsc = v3d_resource(prsc);
+
+        /* We need to sync if graphics pipeline reads a resource written
+         * by the compute pipeline. The same would be needed for the case of
+         * graphics-compute dependency but nowadays all compute jobs
+         * are serialized with the previous submitted job.
+         */
+        if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
+           v3d->sync_on_last_compute_job = true;
+           rsc->compute_written = false;
+        }
+
         if (!entry)
                 return;
 
@@ -220,7 +233,8 @@ v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
 void
 v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
                                 struct pipe_resource *prsc,
-                                enum v3d_flush_cond flush_cond)
+                                enum v3d_flush_cond flush_cond,
+                                bool is_compute_pipeline)
 {
         struct v3d_resource *rsc = v3d_resource(prsc);
 
@@ -230,7 +244,8 @@ v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
          * caller intends to write to the resource, so we don't care if
          * there was a previous TF write to it.
          */
-        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond);
+        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
+                                        is_compute_pipeline);
 
         hash_table_foreach(v3d->jobs, entry) {
                 struct v3d_job *job = entry->data;
@@ -329,7 +344,8 @@ v3d_get_job(struct v3d_context *v3d,
         for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) {
                 if (cbufs[i]) {
                         v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                         pipe_surface_reference(&job->cbufs[i], cbufs[i]);
 
                         if (cbufs[i]->texture->nr_samples > 1)
@@ -338,7 +354,8 @@ v3d_get_job(struct v3d_context *v3d,
         }
         if (zsbuf) {
                 v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
-                                                V3D_FLUSH_DEFAULT);
+                                                V3D_FLUSH_DEFAULT,
+                                                false);
                 pipe_surface_reference(&job->zsbuf, zsbuf);
                 if (zsbuf->texture->nr_samples > 1)
                         job->msaa = true;
@@ -356,7 +373,8 @@ v3d_get_job(struct v3d_context *v3d,
                 if (rsc->separate_stencil) {
                         v3d_flush_jobs_reading_resource(v3d,
                                                         &rsc->separate_stencil->base,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                         _mesa_hash_table_insert(v3d->write_jobs,
                                                 &rsc->separate_stencil->base,
                                                 job);
index cd8af3a7be8b6f2fe7aba4996e75bb374339e7eb..01bf803beefe561a768e431c8d9354b2a1d910a0 100644 (file)
@@ -170,19 +170,23 @@ v3d_map_usage_prep(struct pipe_context *pctx,
                          * don't violate any syncing requirements.
                          */
                         v3d_flush_jobs_reading_resource(v3d, prsc,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                 }
         } else if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
                 /* If we're writing and the buffer is being used by the CL, we
                  * have to flush the CL first.  If we're only reading, we need
                  * to flush if the CL has written our buffer.
                  */
-                if (usage & PIPE_TRANSFER_WRITE)
+                if (usage & PIPE_TRANSFER_WRITE) {
                         v3d_flush_jobs_reading_resource(v3d, prsc,
-                                                        V3D_FLUSH_ALWAYS);
-                else
+                                                        V3D_FLUSH_ALWAYS,
+                                                        false);
+                } else {
                         v3d_flush_jobs_writing_resource(v3d, prsc,
-                                                        V3D_FLUSH_ALWAYS);
+                                                        V3D_FLUSH_ALWAYS,
+                                                        false);
+                }
         }
 
         if (usage & PIPE_TRANSFER_WRITE) {
index f0b7c26e04886444a6c3dda70f61454a1c496dfb..812131dddf28617bf5f9c7fdeb2582cc9efb4451 100644 (file)
@@ -129,6 +129,11 @@ struct v3d_resource {
         int cpp;
         bool tiled;
 
+        /**
+         * Indicates if the CS has written the resource
+         */
+        bool compute_written;
+
         /**
          * Number of times the resource has been written to.
          *
index a0714d4a02efd7aee702cf38d482a4365ffbf899..9bbfe4537e72a956a550847a7f3cf1a4ee7fe60b 100644 (file)
@@ -172,7 +172,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                         v3d_update_shadow_texture(pctx, &view->base);
 
                 v3d_flush_jobs_writing_resource(v3d, view->texture,
-                                                V3D_FLUSH_DEFAULT);
+                                                V3D_FLUSH_DEFAULT,
+                                                s == PIPE_SHADER_COMPUTE);
         }
 
         /* Flush writes to UBOs. */
@@ -180,7 +181,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                 struct pipe_constant_buffer *cb = &v3d->constbuf[s].cb[i];
                 if (cb->buffer) {
                         v3d_flush_jobs_writing_resource(v3d, cb->buffer,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        s == PIPE_SHADER_COMPUTE);
                 }
         }
 
@@ -189,7 +191,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                 struct pipe_shader_buffer *sb = &v3d->ssbo[s].sb[i];
                 if (sb->buffer) {
                         v3d_flush_jobs_reading_resource(v3d, sb->buffer,
-                                                        V3D_FLUSH_NOT_CURRENT_JOB);
+                                                        V3D_FLUSH_NOT_CURRENT_JOB,
+                                                        s == PIPE_SHADER_COMPUTE);
                 }
         }
 
@@ -198,7 +201,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                 struct v3d_image_view *view = &v3d->shaderimg[s].si[i];
 
                 v3d_flush_jobs_reading_resource(v3d, view->base.resource,
-                                                V3D_FLUSH_NOT_CURRENT_JOB);
+                                                V3D_FLUSH_NOT_CURRENT_JOB,
+                                                s == PIPE_SHADER_COMPUTE);
         }
 
         /* Flush writes to our vertex buffers (i.e. from transform feedback) */
@@ -207,7 +211,8 @@ v3d_predraw_check_stage_inputs(struct pipe_context *pctx,
                         struct pipe_vertex_buffer *vb = &v3d->vertexbuf.vb[i];
 
                         v3d_flush_jobs_writing_resource(v3d, vb->buffer.resource,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                 }
         }
 }
@@ -228,7 +233,8 @@ v3d_predraw_check_outputs(struct pipe_context *pctx)
                         const struct pipe_stream_output_target *target =
                                 so->targets[i];
                         v3d_flush_jobs_reading_resource(v3d, target->buffer,
-                                                        V3D_FLUSH_DEFAULT);
+                                                        V3D_FLUSH_DEFAULT,
+                                                        false);
                 }
         }
 }
@@ -1121,7 +1127,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 
         if (info->indirect) {
                 v3d_flush_jobs_writing_resource(v3d, info->indirect->buffer,
-                                                V3D_FLUSH_DEFAULT);
+                                                V3D_FLUSH_DEFAULT, false);
         }
 
         v3d_predraw_check_outputs(pctx);
@@ -1153,6 +1159,14 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
                 job->submit.in_sync_bcl = v3d->out_sync;
         }
 
+        /* We also need to ensure that compute is complete when render depends
+         * on resources written by it.
+         */
+        if (v3d->sync_on_last_compute_job) {
+                job->submit.in_sync_bcl = v3d->out_sync;
+                v3d->sync_on_last_compute_job = false;
+        }
+
         /* Mark SSBOs and images as being written.  We don't actually know
          * which ones are read vs written, so just assume the worst.
          */
@@ -1575,13 +1589,15 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
         foreach_bit(i, v3d->ssbo[PIPE_SHADER_COMPUTE].enabled_mask) {
                 struct v3d_resource *rsc = v3d_resource(
                         v3d->ssbo[PIPE_SHADER_COMPUTE].sb[i].buffer);
-                rsc->writes++; /* XXX */
+                rsc->writes++;
+                rsc->compute_written = true;
         }
 
         foreach_bit(i, v3d->shaderimg[PIPE_SHADER_COMPUTE].enabled_mask) {
                 struct v3d_resource *rsc = v3d_resource(
                         v3d->shaderimg[PIPE_SHADER_COMPUTE].si[i].base.resource);
                 rsc->writes++;
+                rsc->compute_written = true;
         }
 
         v3d_bo_unreference(&uniforms.bo);