radeonsi/gfx10: add si_context::emit_cache_flush
authorNicolai Hähnle <nicolai.haehnle@amd.com>
Thu, 16 Nov 2017 11:16:52 +0000 (12:16 +0100)
committerMarek Olšák <marek.olsak@amd.com>
Wed, 3 Jul 2019 19:51:12 +0000 (15:51 -0400)
The introduction of GCR_CNTL makes cache flush handling on gfx10
sufficiently different that it makes sense to just use a separate
function.

Since emit_cache_flush is called quite early during context init,
we initialize the pointer explicitly in si_create_context.

Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/gallium/drivers/radeonsi/si_compute.c
src/gallium/drivers/radeonsi/si_compute_blit.c
src/gallium/drivers/radeonsi/si_cp_dma.c
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_gfx_cs.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_test_dma_perf.c

index 63c95ed26044b1f418f8a79d8e3518fbf296eb40..f482fed51d86b32e7d621a01f0cd91529536fe5c 100644 (file)
@@ -935,7 +935,7 @@ static void si_launch_grid(
        }
 
        if (sctx->flags)
-               si_emit_cache_flush(sctx);
+               sctx->emit_cache_flush(sctx);
 
        if (!si_switch_compute_shader(sctx, program, &program->shader,
                                        code_object, info->pc))
index 4c5464ac118c3ac79b66b42d70782c1f0671521e..29ead4cdfaad730b0da28eb5e9db6756f676ae97 100644 (file)
@@ -434,7 +434,7 @@ void si_retile_dcc(struct si_context *sctx, struct si_texture *tex)
                       SI_CONTEXT_CS_PARTIAL_FLUSH |
                       si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU) |
                       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_LRU);
-       si_emit_cache_flush(sctx);
+       sctx->emit_cache_flush(sctx);
 
        /* Save states. */
        void *saved_cs = sctx->cs_shader_state.program;
index e83016fc53198bc0540c57afd805025ca025977a..e9ddfe6f49d2c84e1e6b7baca591725e8fcb39b3 100644 (file)
@@ -187,7 +187,7 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
         * Also wait for the previous CP DMA operations.
         */
        if (!(user_flags & SI_CPDMA_SKIP_GFX_SYNC) && sctx->flags)
-               si_emit_cache_flush(sctx);
+               sctx->emit_cache_flush(sctx);
 
        if (!(user_flags & SI_CPDMA_SKIP_SYNC_BEFORE) && *is_first &&
            !(*packet_flags & CP_DMA_CLEAR))
index d27cf9f28244ff698664ed6204577081f19e5ada..330943085d65c120c10d4af1babc72eea9a0b684 100644 (file)
@@ -1904,7 +1904,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
         */
        sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
                         SI_CONTEXT_CS_PARTIAL_FLUSH;
-       si_emit_cache_flush(sctx);
+       sctx->emit_cache_flush(sctx);
 
        util_dynarray_foreach(&sctx->resident_tex_handles,
                              struct si_texture_handle *, tex_handle) {
@@ -1930,7 +1930,7 @@ static void si_upload_bindless_descriptors(struct si_context *sctx)
 
        /* Invalidate L1 because it doesn't know that L2 changed. */
        sctx->flags |= SI_CONTEXT_INV_SCACHE;
-       si_emit_cache_flush(sctx);
+       sctx->emit_cache_flush(sctx);
 
        sctx->bindless_descriptors_dirty = false;
 }
index 9386df3a6158d113b9c2825e1c729d343a2b16ec..fb9286d6b48cdb9be946e07d40e68291e4afcb19 100644 (file)
@@ -173,7 +173,7 @@ void si_flush_gfx_cs(struct si_context *ctx, unsigned flags,
        /* Wait for draw calls to finish if needed. */
        if (wait_flags) {
                ctx->flags |= wait_flags;
-               si_emit_cache_flush(ctx);
+               ctx->emit_cache_flush(ctx);
        }
        ctx->gfx_last_ib_is_busy = wait_flags == 0;
 
index c93f2b96471b9d888c47c0797648d0edfe11a3e3..65a027fe928b83d9ba4d0adcfda0f342ffd7eab8 100644 (file)
@@ -487,6 +487,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
                goto fail;
 
        /* Initialize context functions used by graphics and compute. */
+       sctx->emit_cache_flush = si_emit_cache_flush;
        sctx->b.emit_string_marker = si_emit_string_marker;
        sctx->b.set_debug_callback = si_set_debug_callback;
        sctx->b.set_log_context = si_set_log_context;
index 6ad62e15df1e247578a881bb6611dcbb5518ae1a..162adf05b6086d0dd04e962078837207975212e4 100644 (file)
@@ -862,6 +862,9 @@ struct si_context {
        struct pipe_device_reset_callback device_reset_callback;
        struct u_log_context            *log;
        void                            *query_result_shader;
+
+       void (*emit_cache_flush)(struct si_context *ctx);
+
        struct blitter_context          *blitter;
        void                            *custom_dsa_flush;
        void                            *custom_blend_resolve;
index 087ddacee729757a2866b2f6eeca433537f9f83d..a2e7fa936594708c6acd2b15e70f71a2f8f5993d 100644 (file)
@@ -981,6 +981,8 @@ void si_emit_cache_flush(struct si_context *sctx)
                                (flags & SI_CONTEXT_CS_PARTIAL_FLUSH &&
                                 sctx->compute_is_busy);
 
+       assert(sctx->chip_class <= GFX9);
+
        if (flags & SI_CONTEXT_FLUSH_AND_INV_CB)
                sctx->num_cb_cache_flushes++;
        if (flags & SI_CONTEXT_FLUSH_AND_INV_DB)
@@ -1744,7 +1746,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
                /* Emit all states except possibly render condition. */
                si_emit_all_states(sctx, info, prim, instance_count,
                                   primitive_restart, masked_atoms);
-               si_emit_cache_flush(sctx);
+               sctx->emit_cache_flush(sctx);
                /* <-- CUs are idle here. */
 
                if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond))
@@ -1772,7 +1774,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
                 * states, and draw at the end.
                 */
                if (sctx->flags)
-                       si_emit_cache_flush(sctx);
+                       sctx->emit_cache_flush(sctx);
 
                /* Only prefetch the API VS and VBO descriptors. */
                if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
index 0a0b9c4a657e85801ceace21c2d4d3fdfded1435..17454b88b49f64337e7013354a12f180049928a6 100644 (file)
@@ -253,7 +253,7 @@ void si_test_dma_perf(struct si_screen *sscreen)
                                        /* Flush L2, so that we don't just test L2 cache performance. */
                                        if (!test_sdma) {
                                                sctx->flags |= SI_CONTEXT_WB_L2;
-                                               si_emit_cache_flush(sctx);
+                                               sctx->emit_cache_flush(sctx);
                                        }
 
                                        ctx->end_query(ctx, q[iter]);