radeonsi: ensure cache flushes happen before SET_PREDICATION packets
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 25 Aug 2017 07:04:40 +0000 (09:04 +0200)
committerNicolai Hähnle <nicolai.haehnle@amd.com>
Mon, 4 Sep 2017 11:50:57 +0000 (13:50 +0200)
The data is read when the render_cond_atom is emitted, so we must
delay emitting the atom until after the flush.

Fixes: 0fe0320dc074 ("radeonsi: use optimal packet order when doing a pipeline sync")
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_query.c
src/gallium/drivers/radeonsi/si_state_draw.c

index 9805088bd8f1086b7973f87d776e648b4996d84f..8c70e91ae0326001f94895f32b691acaa117ecba 100644 (file)
@@ -61,7 +61,8 @@ struct u_log_context;
 /* Pipeline & streamout query controls. */
 #define R600_CONTEXT_START_PIPELINE_STATS      (1u << 1)
 #define R600_CONTEXT_STOP_PIPELINE_STATS       (1u << 2)
-#define R600_CONTEXT_PRIVATE_FLAG              (1u << 3)
+#define R600_CONTEXT_FLUSH_FOR_RENDER_COND     (1u << 3)
+#define R600_CONTEXT_PRIVATE_FLAG              (1u << 4)
 
 /* special primitive types */
 #define R600_PRIM_RECTANGLE_LIST       PIPE_PRIM_MAX
index f937612bc1f67398ffc1079efa9d534fc4a7d5a8..03ff1018a7146e69fb73c1120192810ef7ead72e 100644 (file)
@@ -1835,11 +1835,14 @@ static void r600_render_condition(struct pipe_context *ctx,
 
                        /* Settings this in the render cond atom is too late,
                         * so set it here. */
-                       rctx->flags |= rctx->screen->barrier_flags.L2_to_cp;
-
-                       atom->num_dw = 5;
+                       rctx->flags |= rctx->screen->barrier_flags.L2_to_cp |
+                                      R600_CONTEXT_FLUSH_FOR_RENDER_COND;
 
                        rctx->render_cond_force_off = old_force_off;
+               }
+
+               if (needs_workaround) {
+                       atom->num_dw = 5;
                } else {
                        for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
                                atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
index 1d8be49a480752fbf9d6702854ba0d873a3ab1f9..81751d2186ec381c2cd5488b10a504595ec3cd45 100644 (file)
@@ -1392,9 +1392,13 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                 * the wait and the draw)
                 */
                struct r600_atom *shader_pointers = &sctx->shader_pointers.atom;
+               unsigned masked_atoms = 1u << shader_pointers->id;
 
-               /* Emit all states except shader pointers. */
-               si_emit_all_states(sctx, info, 1 << shader_pointers->id);
+               if (unlikely(sctx->b.flags & R600_CONTEXT_FLUSH_FOR_RENDER_COND))
+                       masked_atoms |= 1u << sctx->b.render_cond_atom.id;
+
+               /* Emit all states except shader pointers and render condition. */
+               si_emit_all_states(sctx, info, masked_atoms);
                si_emit_cache_flush(sctx);
 
                /* <-- CUs are idle here. */
@@ -1402,10 +1406,11 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                        return;
 
                /* Set shader pointers after descriptors are uploaded. */
-               if (si_is_atom_dirty(sctx, shader_pointers)) {
+               if (si_is_atom_dirty(sctx, shader_pointers))
                        shader_pointers->emit(&sctx->b, NULL);
-                       sctx->dirty_atoms = 0;
-               }
+               if (si_is_atom_dirty(sctx, &sctx->b.render_cond_atom))
+                       sctx->b.render_cond_atom.emit(&sctx->b, NULL);
+               sctx->dirty_atoms = 0;
 
                si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
                /* <-- CUs are busy here. */