From c6012a6650c894e57dba51f8e336f134aad13d61 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 6 Nov 2015 21:11:16 +0100 Subject: [PATCH] radeonsi: rename cache flushing flags once more MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit KCACHE, TC L1 and TC L2 are renamed to: - SMEM L1 - VMEM L1 - GLOBAL L2 You can easily tell what they are used for now. Shaders must deal with coherency issues between both L1s manually, e.g. by setting GLC=1 or by using s_dcache_*. BOTH_ICACHE_KCACHE was an unused definition. Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeonsi/si_compute.c | 12 ++++++------ src/gallium/drivers/radeonsi/si_cp_dma.c | 6 +++--- src/gallium/drivers/radeonsi/si_descriptors.c | 4 ++-- src/gallium/drivers/radeonsi/si_hw_context.c | 10 +++++----- src/gallium/drivers/radeonsi/si_pipe.h | 15 ++++++--------- src/gallium/drivers/radeonsi/si_state.c | 8 ++++---- src/gallium/drivers/radeonsi/si_state_draw.c | 10 ++++------ 7 files changed, 30 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 697e60a50d9..c008f8b402b 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -253,10 +253,10 @@ static void si_launch_grid( radeon_emit(cs, 0x80000000); radeon_emit(cs, 0x80000000); - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_ICACHE | - SI_CONTEXT_INV_KCACHE | + SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_FLUSH_WITH_INV_L2 | SI_CONTEXT_FLAG_COMPUTE; si_emit_cache_flush(sctx, NULL); @@ -449,10 +449,10 @@ static void si_launch_grid( si_pm4_free_state(sctx, pm4, ~0); sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | - SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_INV_ICACHE | - SI_CONTEXT_INV_KCACHE | + SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_FLAG_COMPUTE; si_emit_cache_flush(sctx, NULL); } diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 3f657ff96ed..ac35a54ce65 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -112,9 +112,9 @@ static unsigned get_flush_flags(struct si_context *sctx, bool is_framebuffer) if (is_framebuffer) return SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; - return SI_CONTEXT_INV_TC_L1 | - (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) | - SI_CONTEXT_INV_KCACHE; + return SI_CONTEXT_INV_SMEM_L1 | + SI_CONTEXT_INV_VMEM_L1 | + (sctx->b.chip_class == SI ? SI_CONTEXT_INV_GLOBAL_L2 : 0); } static unsigned get_tc_l2_flag(struct si_context *sctx, bool is_framebuffer) diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index a8ff6f27319..b4dc3cb2347 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -670,8 +670,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, * VS_PARTIAL_FLUSH is required if the buffers are going to be * used as an input immediately. */ - sctx->b.flags |= SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_INV_TC_L1 | + sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 | + SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_VS_PARTIAL_FLUSH; } diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 7c147e2e44c..9b8bdf5d901 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -73,8 +73,8 @@ void si_context_gfx_flush(void *context, unsigned flags, r600_preflush_suspend_features(&ctx->b); ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | - SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | + SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | /* this is probably not needed anymore */ SI_CONTEXT_PS_PARTIAL_FLUSH; si_emit_cache_flush(ctx, NULL); @@ -144,9 +144,9 @@ void si_begin_new_cs(struct si_context *ctx) /* Flush read caches at the beginning of CS. */ ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER | - SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_INV_KCACHE | + SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | + SI_CONTEXT_INV_SMEM_L1 | SI_CONTEXT_INV_ICACHE; /* set all valid group as dirty so they get reemited on diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 42cd8803c36..20fd6952d8c 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -46,15 +46,12 @@ /* Instruction cache. */ #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0) -/* Cache used by scalar memory (SMEM) instructions. They also use TC - * as a second level cache, which isn't flushed by this. - * Other names: constant cache, data cache, DCACHE */ -#define SI_CONTEXT_INV_KCACHE (R600_CONTEXT_PRIVATE_FLAG << 1) -/* Caches used by vector memory (VMEM) instructions. - * L1 can optionally be bypassed (GLC=1) and can only be used by shaders. - * L2 is used by shaders and can be used by other blocks (CP, sDMA). */ -#define SI_CONTEXT_INV_TC_L1 (R600_CONTEXT_PRIVATE_FLAG << 2) -#define SI_CONTEXT_INV_TC_L2 (R600_CONTEXT_PRIVATE_FLAG << 3) +/* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */ +#define SI_CONTEXT_INV_SMEM_L1 (R600_CONTEXT_PRIVATE_FLAG << 1) +/* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */ +#define SI_CONTEXT_INV_VMEM_L1 (R600_CONTEXT_PRIVATE_FLAG << 2) +/* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ +#define SI_CONTEXT_INV_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 3) /* Framebuffer caches. */ #define SI_CONTEXT_FLUSH_AND_INV_CB_META (R600_CONTEXT_PRIVATE_FLAG << 4) #define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 5) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 6d97049c0f3..2c35c0a8e0a 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2125,8 +2125,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, * Flush all CB and DB caches here because all buffers can be used * for write by both TC (with shader image stores) and CB/DB. */ - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; /* Take the maximum of the old and new count. If the new count is lower, @@ -3044,8 +3044,8 @@ static void si_texture_barrier(struct pipe_context *ctx) { struct si_context *sctx = (struct si_context *)ctx; - sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | + sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 | + SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_FLUSH_AND_INV_CB; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index cf0891a2ab7..6c7fff9735c 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -604,8 +604,6 @@ static void si_emit_draw_packets(struct si_context *sctx, } } -#define BOTH_ICACHE_KCACHE (SI_CONTEXT_INV_ICACHE | SI_CONTEXT_INV_KCACHE) - void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom) { struct r600_common_context *sctx = &si_ctx->b; @@ -624,12 +622,12 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom) if (sctx->flags & SI_CONTEXT_INV_ICACHE) cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); - if (sctx->flags & SI_CONTEXT_INV_KCACHE) + if (sctx->flags & SI_CONTEXT_INV_SMEM_L1) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (sctx->flags & SI_CONTEXT_INV_TC_L1) + if (sctx->flags & SI_CONTEXT_INV_VMEM_L1) cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); - if (sctx->flags & SI_CONTEXT_INV_TC_L2) { + if (sctx->flags & SI_CONTEXT_INV_GLOBAL_L2) { cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); /* TODO: this might not be needed. */ @@ -843,7 +841,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) /* VI reads index buffers through TC L2. */ if (info->indexed && sctx->b.chip_class <= CIK && r600_resource(ib.buffer)->TC_L2_dirty) { - sctx->b.flags |= SI_CONTEXT_INV_TC_L2; + sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2; r600_resource(ib.buffer)->TC_L2_dirty = false; } -- 2.30.2