From 8ea7ee153649ac07c8418cc0d4aa5a4e123d19d1 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 25 Jun 2019 17:57:45 +0200 Subject: [PATCH] radv: rename and re-document cache flush flags SMEM and VMEM caches are L0 on gfx10. Ported from RadeonSI. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/radv_cmd_buffer.c | 16 ++++----- src/amd/vulkan/radv_device.c | 12 +++---- src/amd/vulkan/radv_meta_buffer.c | 4 +-- src/amd/vulkan/radv_meta_clear.c | 4 +-- src/amd/vulkan/radv_meta_fast_clear.c | 2 +- src/amd/vulkan/radv_meta_fmask_expand.c | 2 +- src/amd/vulkan/radv_meta_resolve_cs.c | 4 +-- src/amd/vulkan/radv_private.h | 44 ++++++++++++++----------- src/amd/vulkan/radv_query.c | 8 ++--- src/amd/vulkan/si_cmd_buffer.c | 24 +++++++------- 10 files changed, 62 insertions(+), 58 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 29f2e0c8a60..8ffd3989634 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2576,7 +2576,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, case VK_ACCESS_SHADER_WRITE_BIT: case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: - flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + flush_bits |= RADV_CMD_FLAG_WB_L2; break; case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; @@ -2591,7 +2591,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, case VK_ACCESS_TRANSFER_WRITE_BIT: flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | - RADV_CMD_FLAG_INV_GLOBAL_L2; + RADV_CMD_FLAG_INV_L2; if (flush_CB_meta) flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; @@ -2648,19 +2648,19 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: break; case VK_ACCESS_UNIFORM_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; + flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; break; case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: case VK_ACCESS_TRANSFER_READ_BIT: case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2; + flush_bits |= RADV_CMD_FLAG_INV_VCACHE | + RADV_CMD_FLAG_INV_L2; break; case VK_ACCESS_SHADER_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1; + flush_bits |= RADV_CMD_FLAG_INV_VCACHE; if (!image_is_coherent) - flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; + flush_bits |= RADV_CMD_FLAG_INV_L2; break; case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: if (flush_CB) @@ -3355,7 +3355,7 @@ VkResult radv_EndCommandBuffer( if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) { if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6) - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; /* Make sure to sync all pending active queries at the end of * command buffer. diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index f12b8bde1f9..8d4964073cf 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -2704,9 +2704,9 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->device->physical_device->rad_info.chip_class >= GFX7, (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2 | + RADV_CMD_FLAG_INV_SCACHE | + RADV_CMD_FLAG_INV_VCACHE | + RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS, 0); } else if (i == 1) { si_cs_emit_cache_flush(cs, @@ -2715,9 +2715,9 @@ radv_get_preamble_cs(struct radv_queue *queue, queue->queue_family_index == RING_COMPUTE && queue->device->physical_device->rad_info.chip_class >= GFX7, RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2 | + RADV_CMD_FLAG_INV_SCACHE | + RADV_CMD_FLAG_INV_VCACHE | + RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS, 0); } diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c index c19bf0da1c3..c457ac4e5f2 100644 --- a/src/amd/vulkan/radv_meta_buffer.c +++ b/src/amd/vulkan/radv_meta_buffer.c @@ -415,8 +415,8 @@ uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, if (size >= RADV_BUFFER_OPS_CS_THRESHOLD) { fill_buffer_shader(cmd_buffer, bo, offset, size, value); flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + RADV_CMD_FLAG_INV_VCACHE | + RADV_CMD_FLAG_WB_L2; } else if (size) { uint64_t va = radv_buffer_get_va(bo); va += offset; diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c index 4d569729dda..091b73841f8 100644 --- a/src/amd/vulkan/radv_meta_clear.c +++ b/src/amd/vulkan/radv_meta_clear.c @@ -870,8 +870,8 @@ clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, radv_meta_restore(&saved_state, cmd_buffer); return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2; + RADV_CMD_FLAG_INV_VCACHE | + RADV_CMD_FLAG_WB_L2; } static uint32_t diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c index 71cf90c611e..f18f7637593 100644 --- a/src/amd/vulkan/radv_meta_fast_clear.c +++ b/src/amd/vulkan/radv_meta_fast_clear.c @@ -873,7 +873,7 @@ radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, radv_meta_restore(&saved_state, cmd_buffer); state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1; + RADV_CMD_FLAG_INV_VCACHE; /* Initialize the DCC metadata as "fully expanded". */ diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c index a8f5e0cc4c1..c4cec58235f 100644 --- a/src/amd/vulkan/radv_meta_fmask_expand.c +++ b/src/amd/vulkan/radv_meta_fmask_expand.c @@ -169,7 +169,7 @@ radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, radv_meta_restore(&saved_state, cmd_buffer); cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_GLOBAL_L2; + RADV_CMD_FLAG_INV_L2; /* Re-initialize FMASK in fully expanded mode. */ radv_initialize_fmask(cmd_buffer, image, subresourceRange); diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c index c06f0f2c5ce..7d3cc166e0d 100644 --- a/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/radv_meta_resolve_cs.c @@ -952,7 +952,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) } cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1; + RADV_CMD_FLAG_INV_VCACHE; } void @@ -1037,7 +1037,7 @@ radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer, } cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1; + RADV_CMD_FLAG_INV_VCACHE; if (radv_image_has_htile(dst_image)) { if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index b537778001c..0c842a4d1b7 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -914,29 +914,33 @@ enum radv_cmd_dirty_bits { }; enum radv_cmd_flush_bits { - RADV_CMD_FLAG_INV_ICACHE = 1 << 0, - /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */ - RADV_CMD_FLAG_INV_SMEM_L1 = 1 << 1, - /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */ - RADV_CMD_FLAG_INV_VMEM_L1 = 1 << 2, - /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC L2 */ - RADV_CMD_FLAG_INV_GLOBAL_L2 = 1 << 3, - /* Same as above, but only writes back and doesn't invalidate */ - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 = 1 << 4, + /* Instruction cache. */ + RADV_CMD_FLAG_INV_ICACHE = 1 << 0, + /* Scalar L1 cache. */ + RADV_CMD_FLAG_INV_SCACHE = 1 << 1, + /* Vector L1 cache. */ + RADV_CMD_FLAG_INV_VCACHE = 1 << 2, + /* L2 cache + L2 metadata cache writeback & invalidate. + * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */ + RADV_CMD_FLAG_INV_L2 = 1 << 3, + /* L2 writeback (write dirty L2 lines to memory for non-L2 clients). + * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8. + * GFX6-7 will do complete invalidation, because the writeback is unsupported. */ + RADV_CMD_FLAG_WB_L2 = 1 << 4, /* Framebuffer caches */ - RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5, - RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6, - RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7, - RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8, + RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 5, + RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 6, + RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 7, + RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 8, /* Engine synchronization. */ - RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9, - RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10, - RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11, - RADV_CMD_FLAG_VGT_FLUSH = 1 << 12, + RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 9, + RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 10, + RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 11, + RADV_CMD_FLAG_VGT_FLUSH = 1 << 12, /* Pipeline query controls. */ - RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13, - RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14, - RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15, + RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 13, + RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 14, + RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 15, RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META | diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index bec7b23af05..82741c21bf7 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1012,8 +1012,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants); - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 | - RADV_CMD_FLAG_INV_VMEM_L1; + cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 | + RADV_CMD_FLAG_INV_VCACHE; if (flags & VK_QUERY_RESULT_WAIT_BIT) cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER; @@ -1639,8 +1639,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_GLOBAL_L2 | - RADV_CMD_FLAG_INV_VMEM_L1; + RADV_CMD_FLAG_INV_L2 | + RADV_CMD_FLAG_INV_VCACHE; if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) { cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 126cabd390a..52cb7477c08 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -781,7 +781,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); - if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1) + if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); if (chip_class <= GFX8) { @@ -859,16 +859,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, EVENT_TC_MD_ACTION_ENA; /* Ideally flush TC together with CB/DB. */ - if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) { + if (flush_bits & RADV_CMD_FLAG_INV_L2) { /* Writeback and invalidate everything in L2 & L1. */ tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_WB_ACTION_ENA; /* Clear the flags. */ - flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 | - RADV_CMD_FLAG_INV_VMEM_L1); + flush_bits &= ~(RADV_CMD_FLAG_INV_L2 | + RADV_CMD_FLAG_WB_L2 | + RADV_CMD_FLAG_INV_VCACHE); } assert(flush_cnt); (*flush_cnt)++; @@ -898,16 +898,16 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, */ if ((cp_coher_cntl || (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2 | - RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) && + RADV_CMD_FLAG_INV_VCACHE | + RADV_CMD_FLAG_INV_L2 | + RADV_CMD_FLAG_WB_L2))) && !is_mec) { radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); radeon_emit(cs, 0); } - if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) || - (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) { + if ((flush_bits & RADV_CMD_FLAG_INV_L2) || + (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) { si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9, cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | @@ -915,7 +915,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8)); cp_coher_cntl = 0; } else { - if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) { + if(flush_bits & RADV_CMD_FLAG_WB_L2) { /* WB = write-back * NC = apply to non-coherent MTYPEs * (i.e. MTYPE <= 1, which is what we use everywhere) @@ -929,7 +929,7 @@ si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, S_0301F0_TC_NC_ACTION_ENA(1)); cp_coher_cntl = 0; } - if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) { + if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) { si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9, cp_coher_cntl | -- 2.30.2