radv: predicate cmask eliminate when using DCC.
authorDave Airlie <airlied@redhat.com>
Thu, 2 Mar 2017 21:39:10 +0000 (21:39 +0000)
committerDave Airlie <airlied@redhat.com>
Mon, 17 Jul 2017 00:44:43 +0000 (01:44 +0100)
When using DCC some clear values don't require a cmask eliminate
step. This patch adds support for black and black with alpha 1,
there are other values, but I don't have access to a comprehensive list.

This works by setting the cmask eliminate predicate when doing the
fast clear, and later when doing the cmask elimination making sure
the draws are predicated.

This increases the fps on Sascha Willems deferred.

Tonga: 580fps->670fps on a Tonga PRO card.
Polaris 730->850fps

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_image.c
src/amd/vulkan/radv_meta_clear.c
src/amd/vulkan/radv_meta_fast_clear.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/si_cmd_buffer.c

index 9c20bb003c4befa6a90331d36267c9a60201c3c5..058770bcbfd10dfaf9d90b41c498c8b0707c8466 100644 (file)
@@ -1117,6 +1117,35 @@ radv_load_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
        radeon_emit(cmd_buffer->cs, 0);
 }
 
+/*
+ *with DCC some colors don't require CMASK elimiation before being
+ * used as a texture. This sets a predicate value to determine if the
+ * cmask eliminate is required.
+ */
+void
+radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
+                                 struct radv_image *image,
+                                 bool value)
+{
+       uint64_t pred_val = value;
+       uint64_t va = cmd_buffer->device->ws->buffer_get_va(image->bo);
+       va += image->offset + image->dcc_pred_offset;
+
+       if (!image->surface.dcc_size)
+               return;
+
+       cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, image->bo, 8);
+
+       radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 4, 0));
+       radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+                                   S_370_WR_CONFIRM(1) |
+                                   S_370_ENGINE_SEL(V_370_PFP));
+       radeon_emit(cmd_buffer->cs, va);
+       radeon_emit(cmd_buffer->cs, va >> 32);
+       radeon_emit(cmd_buffer->cs, pred_val);
+       radeon_emit(cmd_buffer->cs, pred_val >> 32);
+}
+
 void
 radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
                          struct radv_image *image,
index a8af4fd6d6860f3b6c1643650043190d73720ecc..9e54b95ac3f77a065eb7fe3bb52331e47b3b3892 100644 (file)
@@ -712,12 +712,16 @@ static void
 radv_image_alloc_cmask(struct radv_device *device,
                       struct radv_image *image)
 {
+       uint32_t clear_value_size = 0;
        radv_image_get_cmask_info(device, image, &image->cmask);
 
        image->cmask.offset = align64(image->size, image->cmask.alignment);
        /* + 8 for storing the clear values */
-       image->clear_value_offset = image->cmask.offset + image->cmask.size;
-       image->size = image->cmask.offset + image->cmask.size + 8;
+       if (!image->clear_value_offset) {
+               image->clear_value_offset = image->cmask.offset + image->cmask.size;
+               clear_value_size = 8;
+       }
+       image->size = image->cmask.offset + image->cmask.size + clear_value_size;
        image->alignment = MAX2(image->alignment, image->cmask.alignment);
 }
 
@@ -726,9 +730,10 @@ radv_image_alloc_dcc(struct radv_device *device,
                       struct radv_image *image)
 {
        image->dcc_offset = align64(image->size, image->surface.dcc_alignment);
-       /* + 8 for storing the clear values */
+       /* + 16 for storing the clear values + dcc pred */
        image->clear_value_offset = image->dcc_offset + image->surface.dcc_size;
-       image->size = image->dcc_offset + image->surface.dcc_size + 8;
+       image->dcc_pred_offset = image->clear_value_offset + 8;
+       image->size = image->dcc_offset + image->surface.dcc_size + 16;
        image->alignment = MAX2(image->alignment, image->surface.dcc_alignment);
 }
 
index bf58345245373066c0e6141af02ccf7081ff17f2..353e8382ca19892f5a5e554cfd2a4ea442810947 100644 (file)
@@ -856,6 +856,83 @@ fail:
        return res;
 }
 
+static void vi_get_fast_clear_parameters(VkFormat format,
+                                        const VkClearColorValue *clear_value,
+                                        uint32_t* reset_value,
+                                        bool *can_avoid_fast_clear_elim)
+{
+       bool values[4] = {};
+       int extra_channel;
+       bool main_value = false;
+       bool extra_value = false;
+       int i;
+       *can_avoid_fast_clear_elim = false;
+
+       *reset_value = 0x20202020U;
+
+       const struct vk_format_description *desc = vk_format_description(format);
+       if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
+           format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
+           format == VK_FORMAT_B5G6R5_UNORM_PACK16)
+               extra_channel = -1;
+       else if (desc->layout == VK_FORMAT_LAYOUT_PLAIN) {
+               if (radv_translate_colorswap(format, false) <= 1)
+                       extra_channel = desc->nr_channels - 1;
+               else
+                       extra_channel = 0;
+       } else
+               return;
+
+       for (i = 0; i < 4; i++) {
+               int index = desc->swizzle[i] - VK_SWIZZLE_X;
+               if (desc->swizzle[i] < VK_SWIZZLE_X ||
+                   desc->swizzle[i] > VK_SWIZZLE_W)
+                       continue;
+
+               if (desc->channel[i].pure_integer &&
+                   desc->channel[i].type == VK_FORMAT_TYPE_SIGNED) {
+                       /* Use the maximum value for clamping the clear color. */
+                       int max = u_bit_consecutive(0, desc->channel[i].size - 1);
+
+                       values[i] = clear_value->int32[i] != 0;
+                       if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
+                               return;
+               } else if (desc->channel[i].pure_integer &&
+                          desc->channel[i].type == VK_FORMAT_TYPE_UNSIGNED) {
+                       /* Use the maximum value for clamping the clear color. */
+                       unsigned max = u_bit_consecutive(0, desc->channel[i].size);
+
+                       values[i] = clear_value->uint32[i] != 0U;
+                       if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
+                               return;
+               } else {
+                       values[i] = clear_value->float32[i] != 0.0F;
+                       if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
+                               return;
+               }
+
+               if (index == extra_channel)
+                       extra_value = values[i];
+               else
+                       main_value = values[i];
+       }
+
+       for (int i = 0; i < 4; ++i)
+               if (values[i] != main_value &&
+                   desc->swizzle[i] - VK_SWIZZLE_X != extra_channel &&
+                   desc->swizzle[i] >= VK_SWIZZLE_X &&
+                   desc->swizzle[i] <= VK_SWIZZLE_W)
+                       return;
+
+       *can_avoid_fast_clear_elim = true;
+       if (main_value)
+               *reset_value |= 0x80808080U;
+
+       if (extra_value)
+               *reset_value |= 0x40404040U;
+       return;
+}
+
 static bool
 emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
                      const VkClearAttachment *clear_att,
@@ -930,9 +1007,17 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
                                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
        /* clear cmask buffer */
        if (iview->image->surface.dcc_size) {
+               uint32_t reset_value;
+               bool can_avoid_fast_clear_elim;
+               vi_get_fast_clear_parameters(iview->image->vk_format,
+                                            &clear_value, &reset_value,
+                                            &can_avoid_fast_clear_elim);
+
                radv_fill_buffer(cmd_buffer, iview->image->bo,
                                 iview->image->offset + iview->image->dcc_offset,
-                                iview->image->surface.dcc_size, 0x20202020);
+                                iview->image->surface.dcc_size, reset_value);
+               radv_set_dcc_need_cmask_elim_pred(cmd_buffer, iview->image,
+                                                 !can_avoid_fast_clear_elim);
        } else {
 
                if (iview->image->surface.bpe > 8) {
index 94610c469879049fb6916843a1ee572f58a0ec97..27f8c160c068ab0193f88dc2edb4ce70aed72eae 100644 (file)
@@ -334,6 +334,20 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
                                         RADV_CMD_FLAG_FLUSH_AND_INV_CB_META);
 }
 
+static void
+radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
+                                     struct radv_image *image, bool value)
+{
+       uint64_t va = 0;
+
+       if (value) {
+               va = cmd_buffer->device->ws->buffer_get_va(image->bo) + image->offset;
+               va += image->dcc_pred_offset;
+       }
+
+       si_emit_set_predication_state(cmd_buffer, va);
+}
+
 /**
  */
 void
@@ -351,6 +365,10 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
        radv_meta_save_pass(&saved_pass_state, cmd_buffer);
        radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
 
+       if (image->surface.dcc_size) {
+               radv_emit_set_predication_state_from_image(cmd_buffer, image, true);
+               cmd_buffer->state.predicating = true;
+       }
        for (uint32_t layer = 0; layer < layer_count; ++layer) {
                struct radv_image_view iview;
 
@@ -413,6 +431,10 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
                                        &cmd_buffer->pool->alloc);
 
        }
+       if (image->surface.dcc_size) {
+               cmd_buffer->state.predicating = false;
+               radv_emit_set_predication_state_from_image(cmd_buffer, image, false);
+       }
        radv_meta_restore(&saved_state, cmd_buffer);
        radv_meta_restore_pass(&saved_pass_state, cmd_buffer);
 }
index e1fb55654946d7b8b8c5ee5a87564eb92d11cecf..891b34ef138afbd5e8d18ee8b9e376c3ee69835c 100644 (file)
@@ -871,7 +871,7 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                            bool is_mec,
                            enum radv_cmd_flush_bits flush_bits);
 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
-void si_emit_set_pred(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
+void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
                           uint64_t src_va, uint64_t dest_va,
                           uint64_t size);
@@ -914,6 +914,9 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
                               struct radv_image *image,
                               int idx,
                               uint32_t color_values[2]);
+void radv_set_dcc_need_cmask_elim_pred(struct radv_cmd_buffer *cmd_buffer,
+                                      struct radv_image *image,
+                                      bool value);
 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
                      struct radeon_winsys_bo *bo,
                      uint64_t offset, uint64_t size, uint32_t value);
@@ -1219,6 +1222,7 @@ struct radv_image {
        struct radv_fmask_info fmask;
        struct radv_cmask_info cmask;
        uint32_t clear_value_offset;
+       uint32_t dcc_pred_offset;
 };
 
 /* Whether the image has a htile that is known consistent with the contents of
index ace9e665354a1df6b988b3d0878782dbd6593afe..88616edfa27c5395ccb393658475a3ac55b3ad81 100644 (file)
@@ -1129,8 +1129,9 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
        cmd_buffer->state.flush_bits = 0;
 }
 
+/* sets the CP predication state using a boolean stored at va */
 void
-si_emit_set_pred(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
+si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
 {
        uint32_t val = 0;