radeonsi: invalidate caches at the beginning of the prim discard compute IB
authorMarek Olšák <marek.olsak@amd.com>
Tue, 26 Feb 2019 03:53:37 +0000 (22:53 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 16 May 2019 17:13:36 +0000 (13:13 -0400)
Acked-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeonsi/si_compute_prim_discard.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c

index 8261311f74a26724eb11b3b7593f1bc8086ff91d..362c63c2e44ba45328188a91d8345d7a81aa4fe2 100644 (file)
@@ -1196,6 +1196,17 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,
                }
 
                /* 2) IB initialization. */
+
+               /* This needs to be done at the beginning of IBs due to possible
+                * TTM buffer moves in the kernel.
+                */
+               si_emit_surface_sync(sctx, cs,
+                                    S_0085F0_TC_ACTION_ENA(1) |
+                                    S_0085F0_TCL1_ACTION_ENA(1) |
+                                    S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8) |
+                                    S_0085F0_SH_ICACHE_ACTION_ENA(1) |
+                                    S_0085F0_SH_KCACHE_ACTION_ENA(1));
+
                /* Restore the GDS prim restart counter if needed. */
                if (sctx->preserve_prim_restart_gds_at_flush) {
                        si_cp_copy_data(sctx, cs,
index 05e974d4c12881c5e8d87a61d4ebdc7cc4e5361d..66a2024144673ea8f8a1c4a595b6b9eaa54198eb 100644 (file)
@@ -604,6 +604,8 @@ void si_shader_selector_key_vs(struct si_context *sctx,
                               struct si_vs_prolog_bits *prolog_key);
 
 /* si_state_draw.c */
+void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
+                         unsigned cp_coher_cntl);
 void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx);
 void si_emit_cache_flush(struct si_context *sctx);
 void si_trace_emit(struct si_context *sctx);
index 2c571016ada54e11f35286fbf5d88311b58ca0b8..d7de37b33ff260e0bdb666343d65c05b518cf606 100644 (file)
@@ -889,12 +889,13 @@ static void si_emit_draw_packets(struct si_context *sctx,
        }
 }
 
-static void si_emit_surface_sync(struct si_context *sctx,
-                                unsigned cp_coher_cntl)
+void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,
+                         unsigned cp_coher_cntl)
 {
-       struct radeon_cmdbuf *cs = sctx->gfx_cs;
+       bool compute_ib = !sctx->has_graphics ||
+                         cs == sctx->prim_discard_compute_cs;
 
-       if (sctx->chip_class >= GFX9 || !sctx->has_graphics) {
+       if (sctx->chip_class >= GFX9 || compute_ib) {
                /* Flush caches and wait for the caches to assert idle. */
                radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
                radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
@@ -914,7 +915,7 @@ static void si_emit_surface_sync(struct si_context *sctx,
 
        /* ACQUIRE_MEM has an implicit context roll if the current context
         * is busy. */
-       if (sctx->has_graphics)
+       if (!compute_ib)
                sctx->context_roll = true;
 }
 
@@ -1162,7 +1163,7 @@ void si_emit_cache_flush(struct si_context *sctx)
                /* Invalidate L1 & L2. (L1 is always invalidated on GFX6)
                 * WB must be set on GFX8+ when TC_ACTION is set.
                 */
-               si_emit_surface_sync(sctx, cp_coher_cntl |
+               si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
                                     S_0085F0_TC_ACTION_ENA(1) |
                                     S_0085F0_TCL1_ACTION_ENA(1) |
                                     S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8));
@@ -1179,7 +1180,7 @@ void si_emit_cache_flush(struct si_context *sctx)
                         *
                         * WB doesn't work without NC.
                         */
-                       si_emit_surface_sync(sctx, cp_coher_cntl |
+                       si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
                                             S_0301F0_TC_WB_ACTION_ENA(1) |
                                             S_0301F0_TC_NC_ACTION_ENA(1));
                        cp_coher_cntl = 0;
@@ -1187,7 +1188,7 @@ void si_emit_cache_flush(struct si_context *sctx)
                }
                if (flags & SI_CONTEXT_INV_VMEM_L1) {
                        /* Invalidate per-CU VMEM L1. */
-                       si_emit_surface_sync(sctx, cp_coher_cntl |
+                       si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl |
                                             S_0085F0_TCL1_ACTION_ENA(1));
                        cp_coher_cntl = 0;
                }
@@ -1195,7 +1196,7 @@ void si_emit_cache_flush(struct si_context *sctx)
 
        /* If TC flushes haven't cleared this... */
        if (cp_coher_cntl)
-               si_emit_surface_sync(sctx, cp_coher_cntl);
+               si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl);
 
        if (is_barrier)
                si_prim_discard_signal_next_compute_ib_start(sctx);