radv: use LOAD_CONTEXT_REG when loading fast clear values
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 7 Nov 2018 21:05:31 +0000 (22:05 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 8 Nov 2018 09:41:45 +0000 (10:41 +0100)
This avoids syncing the Micro Engine. This is only supported
for VI+ currently. There is probably a way for using
LOAD_CONTEXT_REG on previous chips but that could be done later.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/common/sid.h
src/amd/vulkan/radv_cmd_buffer.c

index 5c53133147f728e9e28c7d6c06a8f38f31a9cd94..35782046dd5df034e5d3c34f224dc0d72e4895b4 100644 (file)
 #define PKT3_INCREMENT_CE_COUNTER              0x84
 #define PKT3_INCREMENT_DE_COUNTER              0x85
 #define PKT3_WAIT_ON_CE_COUNTER                0x86
+#define PKT3_LOAD_CONTEXT_REG                  0x9F /* new for VI */
 
 #define PKT_TYPE_S(x)                   (((unsigned)(x) & 0x3) << 30)
 #define PKT_TYPE_G(x)                   (((x) >> 30) & 0x3)
index de67a8a363676fddc86328ca9b7aee78b7c17092..9fd9e81b3c1fa2849d48df7ac4b88ddacc500666 100644 (file)
@@ -37,6 +37,8 @@
 
 #include "ac_debug.h"
 
+#include "addrlib/gfx9/chip/gfx9_enum.h"
+
 enum {
        RADV_PREFETCH_VBO_DESCRIPTORS   = (1 << 0),
        RADV_PREFETCH_VS                = (1 << 1),
@@ -1313,17 +1315,13 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
        if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
                ++reg_count;
 
-       radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-                       COPY_DATA_DST_SEL(COPY_DATA_REG) |
-                       (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
+       uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
+
+       radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
        radeon_emit(cs, va);
        radeon_emit(cs, va >> 32);
-       radeon_emit(cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
-       radeon_emit(cs, 0);
-
-       radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-       radeon_emit(cs, 0);
+       radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
+       radeon_emit(cs, reg_count);
 }
 
 /*
@@ -1443,17 +1441,26 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 
        uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
 
-       radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-                       COPY_DATA_DST_SEL(COPY_DATA_REG) |
-                       COPY_DATA_COUNT_SEL);
-       radeon_emit(cs, va);
-       radeon_emit(cs, va >> 32);
-       radeon_emit(cs, reg >> 2);
-       radeon_emit(cs, 0);
+       if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
+               radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
+               radeon_emit(cs, va);
+               radeon_emit(cs, va >> 32);
+               radeon_emit(cs, (reg >> 2) - CONTEXT_SPACE_START);
+               radeon_emit(cs, 2);
+       } else {
+               /* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
+               radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
+               radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+                               COPY_DATA_DST_SEL(COPY_DATA_REG) |
+                               COPY_DATA_COUNT_SEL);
+               radeon_emit(cs, va);
+               radeon_emit(cs, va >> 32);
+               radeon_emit(cs, reg >> 2);
+               radeon_emit(cs, 0);
 
-       radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
-       radeon_emit(cs, 0);
+               radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
+               radeon_emit(cs, 0);
+       }
 }
 
 static void