From f425d9ee74ce81be3aa9dfefad572d40c5d42372 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 7 Nov 2018 22:05:31 +0100 Subject: [PATCH] radv: use LOAD_CONTEXT_REG when loading fast clear values This avoids syncing the Micro Engine. This is only supported for VI+ currently. There is probably a way for using LOAD_CONTEXT_REG on previous chips but that could be done later. Signed-off-by: Samuel Pitoiset Reviewed-by: Dave Airlie --- src/amd/common/sid.h | 1 + src/amd/vulkan/radv_cmd_buffer.c | 45 ++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index 5c53133147f..35782046dd5 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -217,6 +217,7 @@ #define PKT3_INCREMENT_CE_COUNTER 0x84 #define PKT3_INCREMENT_DE_COUNTER 0x85 #define PKT3_WAIT_ON_CE_COUNTER 0x86 +#define PKT3_LOAD_CONTEXT_REG 0x9F /* new for VI */ #define PKT_TYPE_S(x) (((unsigned)(x) & 0x3) << 30) #define PKT_TYPE_G(x) (((x) >> 30) & 0x3) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index de67a8a3636..9fd9e81b3c1 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -37,6 +37,8 @@ #include "ac_debug.h" +#include "addrlib/gfx9/chip/gfx9_enum.h" + enum { RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), RADV_PREFETCH_VS = (1 << 1), @@ -1313,17 +1315,13 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ++reg_count; - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); + uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; + + radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); - radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2); - radeon_emit(cs, 0); - - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); - radeon_emit(cs, 0); + radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START); + radeon_emit(cs, reg_count); } /* @@ -1443,17 +1441,26 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; - radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); - radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | - COPY_DATA_DST_SEL(COPY_DATA_REG) | - COPY_DATA_COUNT_SEL); - radeon_emit(cs, va); - radeon_emit(cs, va >> 32); - radeon_emit(cs, reg >> 2); - radeon_emit(cs, 0); + if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) { + radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START); + radeon_emit(cs, 2); + } else { + /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */ + radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); + radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | + COPY_DATA_DST_SEL(COPY_DATA_REG) | + COPY_DATA_COUNT_SEL); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, reg >> 2); + radeon_emit(cs, 0); - radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); - radeon_emit(cs, 0); + radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); + radeon_emit(cs, 0); + } } static void -- 2.30.2