radeonsi: fix ARB_transform_feedback_overflow_query on <= VI
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 25 Aug 2017 14:19:56 +0000 (16:19 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 4 Sep 2017 11:50:54 +0000 (13:50 +0200)
The result written by the shader workaround needs to be written back, or
the CP may read stale data.

Fixes: 78476cfe071a ("radeonsi: enable ARB_transform_feedback_overflow_query")
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_query.c
src/gallium/drivers/radeonsi/si_pipe.c

index d76d4a13841ed8755c415cc78cac96ad8d6659e9..9805088bd8f1086b7973f87d776e648b4996d84f 100644 (file)
@@ -454,6 +454,11 @@ struct r600_common_screen {
                 */
                unsigned cp_to_L2;
 
+               /* Context flags to set so that all writes from earlier jobs
+                * that end in L2 are seen by CP.
+                */
+               unsigned L2_to_cp;
+
                /* Context flags to set so that all writes from earlier
                 * compute jobs are seen by L2 clients.
                 */
index eaff39c830dbe090246b6fe47675dcf3cab1ba4c..f937612bc1f67398ffc1079efa9d534fc4a7d5a8 100644 (file)
@@ -1833,6 +1833,10 @@ static void r600_render_condition(struct pipe_context *ctx,
                                ctx, query, true, PIPE_QUERY_TYPE_U64, 0,
                                &rquery->workaround_buf->b.b, rquery->workaround_offset);
 
+                       /* Settings this in the render cond atom is too late,
+                        * so set it here. */
+                       rctx->flags |= rctx->screen->barrier_flags.L2_to_cp;
+
                        atom->num_dw = 5;
 
                        rctx->render_cond_force_off = old_force_off;
index 18d5806ac8f6bcbf7ca2b2cbde8a2027af267f41..98d65329e3ec01ccdb9ef5d4a19c67ed54ddc13d 100644 (file)
@@ -1078,8 +1078,10 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 
        sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
                                            SI_CONTEXT_INV_VMEM_L1;
-       if (sscreen->b.chip_class <= VI)
+       if (sscreen->b.chip_class <= VI) {
                sscreen->b.barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
+               sscreen->b.barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
+       }
 
        sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH;