From 604b58b554f00ab9eb06eff47bfd4f859424c0ae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 20 Sep 2014 11:48:58 +0200 Subject: [PATCH] radeonsi/compute: flush caches with si_emit_cache_flush Reviewed-by: Alex Deucher Reviewed-by: Tom Stellard --- src/gallium/drivers/radeon/r600_pipe_common.h | 3 +++ src/gallium/drivers/radeonsi/si_compute.c | 27 +++++++++---------- src/gallium/drivers/radeonsi/si_state_draw.c | 25 ++++++++++------- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index beaa312b48d..9a5a100fa6d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -68,12 +68,15 @@ #define R600_CONTEXT_FLUSH_AND_INV_DB_META (1 << 11) #define R600_CONTEXT_FLUSH_AND_INV_DB (1 << 12) #define R600_CONTEXT_FLUSH_AND_INV_CB (1 << 13) +#define R600_CONTEXT_FLUSH_WITH_INV_L2 (1 << 14) /* engine synchronization */ #define R600_CONTEXT_PS_PARTIAL_FLUSH (1 << 16) #define R600_CONTEXT_WAIT_3D_IDLE (1 << 17) #define R600_CONTEXT_WAIT_CP_DMA_IDLE (1 << 18) #define R600_CONTEXT_VGT_FLUSH (1 << 19) #define R600_CONTEXT_VGT_STREAMOUT_SYNC (1 << 20) +/* other flags */ +#define R600_CONTEXT_FLAG_COMPUTE (1u << 31) /* special primitive types */ #define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 3ad9182d5fe..e24c6e22bb4 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -189,17 +189,14 @@ static void si_launch_grid( radeon_emit(cs, 0x80000000); radeon_emit(cs, 0x80000000); - pm4->compute_pkt = true; - - si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE); - si_pm4_cmd_add(pm4, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) | - EVENT_INDEX(0x7) | - EVENT_WRITE_INV_L2); - si_pm4_cmd_end(pm4, false); + sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_INV_SHADER_CACHE | + R600_CONTEXT_INV_CONST_CACHE | + R600_CONTEXT_FLUSH_WITH_INV_L2 | + R600_CONTEXT_FLAG_COMPUTE; + si_emit_cache_flush(&sctx->b, NULL); - si_pm4_inval_texture_cache(pm4); - si_pm4_inval_shader_cache(pm4); - si_cmd_surface_sync(pm4, pm4->cp_coher_cntl); + pm4->compute_pkt = true; /* Upload the kernel arguments */ @@ -368,10 +365,6 @@ static void si_launch_grid( si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(0x4))); si_pm4_cmd_end(pm4, false); - si_pm4_inval_texture_cache(pm4); - si_pm4_inval_shader_cache(pm4); - si_cmd_surface_sync(pm4, pm4->cp_coher_cntl); - si_pm4_emit(sctx, pm4); #if 0 @@ -382,6 +375,12 @@ static void si_launch_grid( #endif si_pm4_free_state(sctx, pm4, ~0); + + sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | + R600_CONTEXT_INV_SHADER_CACHE | + R600_CONTEXT_INV_CONST_CACHE | + R600_CONTEXT_FLAG_COMPUTE; + si_emit_cache_flush(&sctx->b, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 61951eeec9e..a4b70177ea9 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -788,6 +788,8 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato { struct radeon_winsys_cs *cs = sctx->rings.gfx.cs; uint32_t cp_coher_cntl = 0; + uint32_t compute = + PKT3_SHADER_TYPE_S(!!(sctx->flags & R600_CONTEXT_FLAG_COMPUTE)); /* XXX SI flushes both ICACHE and KCACHE if either flag is set. * XXX CIK shouldn't have this issue. Test CIK before separating the flags @@ -821,7 +823,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato if (cp_coher_cntl) { if (sctx->chip_class >= CIK) { - radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0)); + radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) | compute); radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */ @@ -829,7 +831,7 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato radeon_emit(cs, 0); /* CP_COHER_BASE_HI */ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ } else { - radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); + radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0) | compute); radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ radeon_emit(cs, 0); /* CP_COHER_BASE */ @@ -838,37 +840,42 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato } if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_CB_META) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); } if (sctx->flags & R600_CONTEXT_FLUSH_AND_INV_DB_META) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); } + if (sctx->flags & R600_CONTEXT_FLUSH_WITH_INV_L2) { + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); + radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH) | EVENT_INDEX(7) | + EVENT_WRITE_INV_L2); + } if (sctx->flags & (R600_CONTEXT_WAIT_3D_IDLE | R600_CONTEXT_PS_PARTIAL_FLUSH)) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { /* Needed if streamout buffers are going to be used as a source. */ - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } if (sctx->flags & R600_CONTEXT_VGT_FLUSH) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); } if (sctx->flags & R600_CONTEXT_VGT_STREAMOUT_SYNC) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); } sctx->flags = 0; } -const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 17 }; /* number of CS dwords */ +const struct r600_atom si_atom_cache_flush = { si_emit_cache_flush, 19 }; /* number of CS dwords */ static void si_get_draw_start_count(struct si_context *sctx, const struct pipe_draw_info *info, -- 2.30.2