radv: implement compressed FMASK texture reads with RADV_PERFTEST=tccompatcmask
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 14 Nov 2018 15:24:02 +0000 (16:24 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Wed, 19 Jun 2019 08:06:39 +0000 (10:06 +0200)
This allows us to disable the FMASK decompress pass when
transitioning from CB writes to shader reads.

This will likely be improved and enabled by default in the future.

No CTS regressions on GFX8 but a few number of multisample CTS
failures on GFX9 (they look related to the small hint).

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_debug.h
src/amd/vulkan/radv_device.c
src/amd/vulkan/radv_image.c
src/amd/vulkan/radv_meta.h
src/amd/vulkan/radv_meta_fast_clear.c
src/amd/vulkan/radv_private.h

index a26bf6c6a6701354512abde9ddb322871e36a91a..756c97983afced4276c628043980f22dcf90e757 100644 (file)
@@ -1255,6 +1255,15 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
                cb_color_info &= C_028C70_DCC_ENABLE;
        }
 
+       if (radv_image_is_tc_compat_cmask(image) &&
+           (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
+            radv_is_dcc_decompress_pipeline(cmd_buffer))) {
+               /* If this bit is set, the FMASK decompression operation
+                * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
+                */
+               cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
+       }
+
        if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
                radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
                radeon_emit(cmd_buffer->cs, cb->cb_color_base);
index aef0139c1b06f3fbc549ac63f0f9215c48fe604b..75e28000e1457693cfa0e2f747e28939df517908 100644 (file)
@@ -62,6 +62,7 @@ enum {
        RADV_PERFTEST_DCC_MSAA       =  0x10,
        RADV_PERFTEST_BO_LIST        =  0x20,
        RADV_PERFTEST_SHADER_BALLOT  =  0x40,
+       RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
 };
 
 bool
index 1d59aaa0b03622fe4a3530743ab14616e32a30aa..4d43f25aee029ab0a7e3af8b1eb2b4b3329efb4b 100644 (file)
@@ -482,6 +482,7 @@ static const struct debug_control radv_perftest_options[] = {
        {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
        {"bolist", RADV_PERFTEST_BO_LIST},
        {"shader_ballot", RADV_PERFTEST_SHADER_BALLOT},
+       {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
        {NULL, 0}
 };
 
@@ -4397,6 +4398,20 @@ radv_initialise_color_surface(struct radv_device *device,
                        unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
                        cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
                }
+
+               if (radv_image_is_tc_compat_cmask(iview->image)) {
+                       /* Allow the texture block to read FMASK directly
+                        * without decompressing it. This bit must be cleared
+                        * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
+                        * otherwise the operation doesn't happen.
+                        */
+                       cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
+
+                       /* Set CMASK into a tiling format that allows the
+                        * texture block to read it.
+                        */
+                       cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
+               }
        }
 
        if (radv_image_has_cmask(iview->image) &&
index 98df24d3546e7f9b55002a704410c9abd2d6e175..4bea09a8a2b8ca7b3005dfee5a0faa7e2b7f9e07 100644 (file)
@@ -219,6 +219,29 @@ radv_use_dcc_for_image(struct radv_device *device,
        return true;
 }
 
+static bool
+radv_use_tc_compat_cmask_for_image(struct radv_device *device,
+                                  struct radv_image *image)
+{
+       if (!(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
+               return false;
+
+       /* TC-compat CMASK is only available for GFX8+. */
+       if (device->physical_device->rad_info.chip_class < GFX8)
+               return false;
+
+       if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
+               return false;
+
+       if (radv_image_has_dcc(image))
+               return false;
+
+       if (!radv_image_has_cmask(image))
+               return false;
+
+       return true;
+}
+
 static void
 radv_prefill_surface_from_metadata(struct radv_device *device,
                                    struct radeon_surf *surface,
@@ -729,11 +752,26 @@ si_make_texture_descriptor(struct radv_device *device,
                                          S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
                        fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->planes[0].surface.u.gfx9.cmask.pipe_aligned) |
                                          S_008F24_META_RB_ALIGNED(image->planes[0].surface.u.gfx9.cmask.rb_aligned);
+
+                       if (radv_image_is_tc_compat_cmask(image)) {
+                               va = gpu_address + image->offset + image->cmask.offset;
+
+                               fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
+                               fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
+                               fmask_state[7] |= va >> 8;
+                       }
                } else {
                        fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
                        fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
                                S_008F20_PITCH(image->fmask.pitch_in_pixels - 1);
                        fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
+
+                       if (radv_image_is_tc_compat_cmask(image)) {
+                               va = gpu_address + image->offset + image->cmask.offset;
+
+                               fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
+                               fmask_state[7] |= va >> 8;
+                       }
                }
        } else if (fmask_state)
                memset(fmask_state, 0, 8 * 4);
@@ -1122,6 +1160,9 @@ radv_image_create(VkDevice _device,
                /* Try to enable FMASK for multisampled images. */
                if (radv_image_can_enable_fmask(image)) {
                        radv_image_alloc_fmask(device, image);
+
+                       if (radv_use_tc_compat_cmask_for_image(device, image))
+                               image->tc_compatible_cmask = true;
                } else {
                        /* Otherwise, try to enable HTILE for depth surfaces. */
                        if (radv_image_can_enable_htile(image) &&
index d58b08514fe1b39862a970da9ae945b925eab3a9..0cb31b9a7ec788c7dbd958595cddecca025620d8 100644 (file)
@@ -222,6 +222,32 @@ uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
                          struct radv_image *image,
                          const VkImageSubresourceRange *range, uint32_t value);
 
+/**
+ * Return whether the bound pipeline is the FMASK decompress pass.
+ */
+static inline bool
+radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
+{
+       struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+
+       return radv_pipeline_to_handle(pipeline) ==
+              meta_state->fast_clear_flush.fmask_decompress_pipeline;
+}
+
+/**
+ * Return whether the bound pipeline is the DCC decompress pass.
+ */
+static inline bool
+radv_is_dcc_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
+{
+       struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+
+       return radv_pipeline_to_handle(pipeline) ==
+              meta_state->fast_clear_flush.dcc_decompress_pipeline;
+}
+
 /* common nir builder helpers */
 #include "nir/nir_builder.h"
 
index 4398cb7ca598356a0a3a7992ff4fbfd1661aaa19..0e6c7ba72ffca6c58a30135de74b32dccf4374fd 100644 (file)
@@ -646,7 +646,7 @@ radv_process_color_image(struct radv_cmd_buffer *cmd_buffer,
 
        if (decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
                pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
-       } else if (radv_image_has_fmask(image)) {
+       } else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
                pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
        } else {
                pipeline = &cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
index 0f20cbe08af2b3a065aecb54c64968602ea0716f..14bf55e9bd34a96a1d20ed95325fc12c0bd8759b 100644 (file)
@@ -1562,6 +1562,7 @@ struct radv_image {
        uint64_t dcc_offset;
        uint64_t htile_offset;
        bool tc_compatible_htile;
+       bool tc_compatible_cmask;
 
        struct radv_fmask_info fmask;
        struct radv_cmask_info cmask;
@@ -1635,6 +1636,15 @@ radv_image_has_dcc(const struct radv_image *image)
        return image->planes[0].surface.dcc_size;
 }
 
+/**
+ * Return whether the image is TC-compatible CMASK.
+ */
+static inline bool
+radv_image_is_tc_compat_cmask(const struct radv_image *image)
+{
+       return radv_image_has_fmask(image) && image->tc_compatible_cmask;
+}
+
 /**
  * Return whether DCC metadata is enabled for a level.
  */