From 04122532e3c06260ae889a4f6a28d6f9849b00f5 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 25 Feb 2019 22:53:37 -0500 Subject: [PATCH] radeonsi: invalidate caches at the beginning of the prim discard compute IB MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Acked-by: Nicolai Hähnle --- .../radeonsi/si_compute_prim_discard.c | 11 +++++++++++ src/gallium/drivers/radeonsi/si_state.h | 2 ++ src/gallium/drivers/radeonsi/si_state_draw.c | 19 ++++++++++--------- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c index 8261311f74a..362c63c2e44 100644 --- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c +++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c @@ -1196,6 +1196,17 @@ void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx, } /* 2) IB initialization. */ + + /* This needs to be done at the beginning of IBs due to possible + * TTM buffer moves in the kernel. + */ + si_emit_surface_sync(sctx, cs, + S_0085F0_TC_ACTION_ENA(1) | + S_0085F0_TCL1_ACTION_ENA(1) | + S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8) | + S_0085F0_SH_ICACHE_ACTION_ENA(1) | + S_0085F0_SH_KCACHE_ACTION_ENA(1)); + /* Restore the GDS prim restart counter if needed. */ if (sctx->preserve_prim_restart_gds_at_flush) { si_cp_copy_data(sctx, cs, diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 05e974d4c12..66a20241446 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -604,6 +604,8 @@ void si_shader_selector_key_vs(struct si_context *sctx, struct si_vs_prolog_bits *prolog_key); /* si_state_draw.c */ +void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, + unsigned cp_coher_cntl); void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx); void si_emit_cache_flush(struct si_context *sctx); void si_trace_emit(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 2c571016ada..d7de37b33ff 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -889,12 +889,13 @@ static void si_emit_draw_packets(struct si_context *sctx, } } -static void si_emit_surface_sync(struct si_context *sctx, - unsigned cp_coher_cntl) +void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs, + unsigned cp_coher_cntl) { - struct radeon_cmdbuf *cs = sctx->gfx_cs; + bool compute_ib = !sctx->has_graphics || + cs == sctx->prim_discard_compute_cs; - if (sctx->chip_class >= GFX9 || !sctx->has_graphics) { + if (sctx->chip_class >= GFX9 || compute_ib) { /* Flush caches and wait for the caches to assert idle. */ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0)); radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ @@ -914,7 +915,7 @@ static void si_emit_surface_sync(struct si_context *sctx, /* ACQUIRE_MEM has an implicit context roll if the current context * is busy. */ - if (sctx->has_graphics) + if (!compute_ib) sctx->context_roll = true; } @@ -1162,7 +1163,7 @@ void si_emit_cache_flush(struct si_context *sctx) /* Invalidate L1 & L2. (L1 is always invalidated on GFX6) * WB must be set on GFX8+ when TC_ACTION is set. */ - si_emit_surface_sync(sctx, cp_coher_cntl | + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) | S_0301F0_TC_WB_ACTION_ENA(sctx->chip_class >= GFX8)); @@ -1179,7 +1180,7 @@ void si_emit_cache_flush(struct si_context *sctx) * * WB doesn't work without NC. */ - si_emit_surface_sync(sctx, cp_coher_cntl | + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; @@ -1187,7 +1188,7 @@ void si_emit_cache_flush(struct si_context *sctx) } if (flags & SI_CONTEXT_INV_VMEM_L1) { /* Invalidate per-CU VMEM L1. */ - si_emit_surface_sync(sctx, cp_coher_cntl | + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1)); cp_coher_cntl = 0; } @@ -1195,7 +1196,7 @@ void si_emit_cache_flush(struct si_context *sctx) /* If TC flushes haven't cleared this... */ if (cp_coher_cntl) - si_emit_surface_sync(sctx, cp_coher_cntl); + si_emit_surface_sync(sctx, sctx->gfx_cs, cp_coher_cntl); if (is_barrier) si_prim_discard_signal_next_compute_ib_start(sctx); -- 2.30.2