radv: only load needed depth clear regs for fast depth clears
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 15 Nov 2017 14:44:01 +0000 (15:44 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 20 Nov 2017 09:45:27 +0000 (10:45 +0100)
Similar to how the driver sets the depth clear regs after a
fast depth clear. Most of the time, this will copy a 32-bit reg
instead of a 64-bit reg.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_cmd_buffer.c

index ae522d2088fdd6c4eee1fa51c7f95cd3d2037ce6..7d86eee97915b34d41d0d512d25269867d4127cb 100644 (file)
@@ -1331,20 +1331,30 @@ static void
 radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
                           struct radv_image *image)
 {
+       VkImageAspectFlags aspects = vk_format_aspects(image->vk_format);
        uint64_t va = radv_buffer_get_va(image->bo);
        va += image->offset + image->clear_value_offset;
+       unsigned reg_offset = 0, reg_count = 0;
 
        if (!image->surface.htile_size)
                return;
 
+       if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+               ++reg_count;
+       } else {
+               ++reg_offset;
+               va += 4;
+       }
+       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+               ++reg_count;
 
        radeon_emit(cmd_buffer->cs, PKT3(PKT3_COPY_DATA, 4, 0));
        radeon_emit(cmd_buffer->cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
                                    COPY_DATA_DST_SEL(COPY_DATA_REG) |
-                                   COPY_DATA_COUNT_SEL);
+                                   (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
        radeon_emit(cmd_buffer->cs, va);
        radeon_emit(cmd_buffer->cs, va >> 32);
-       radeon_emit(cmd_buffer->cs, R_028028_DB_STENCIL_CLEAR >> 2);
+       radeon_emit(cmd_buffer->cs, (R_028028_DB_STENCIL_CLEAR + 4 * reg_offset) >> 2);
        radeon_emit(cmd_buffer->cs, 0);
 
        radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));