radeonsi: try to hit direct hw MSAA resolve by changing micro mode in clear
authorMarek Olšák <marek.olsak@amd.com>
Wed, 8 Jun 2016 19:00:22 +0000 (21:00 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 14 Jun 2016 18:22:16 +0000 (20:22 +0200)
We could also do MSAA resolve in a compute shader like Vulkan and remove
these workarounds.

v2: comment the magic numbers

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeonsi/si_blit.c

index edfae95ec781128b9df5d0abb87ac0e4209eed0d..57fa9e367a4911c94988f35febac89c0d68f36c0 100644 (file)
@@ -252,6 +252,7 @@ struct r600_texture {
        uint64_t                        dcc_offset; /* 0 = disabled */
        unsigned                        cb_color_info; /* fast clear enable bit */
        unsigned                        color_clear_value[2];
+       unsigned                        last_msaa_resolve_target_micro_mode;
 
        /* Depth buffer compression and fast clear. */
        struct r600_htile_info          htile;
index a1c314ebce0e7571560fb34b62f02819627ab44d..32347f26edd0919986c930d4bf5d1db9100ecf28 100644 (file)
@@ -1012,6 +1012,8 @@ r600_texture_create_object(struct pipe_screen *screen,
         * This must be done after r600_setup_surface.
         * Applies to R600-Cayman. */
        rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
+       /* Applies to GCN. */
+       rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
 
        if (rtex->is_depth) {
                if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
@@ -1808,6 +1810,83 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
                           clear_value, R600_COHERENCY_CB_META);
 }
 
+/* Set the same micro tile mode as the destination of the last MSAA resolve.
+ * This allows hitting the MSAA resolve fast path, which requires that both
+ * src and dst micro tile modes match.
+ */
+static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
+                                          struct r600_texture *rtex)
+{
+       if (rtex->resource.is_shared ||
+           rtex->surface.nsamples <= 1 ||
+           rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
+               return;
+
+       assert(rtex->surface.level[0].mode == RADEON_SURF_MODE_2D);
+       assert(rtex->surface.last_level == 0);
+
+       /* These magic numbers were copied from addrlib. It doesn't use any
+        * definitions for them either. They are all 2D_TILED_THIN1 modes with
+        * different bpp and micro tile mode.
+        */
+       if (rscreen->chip_class >= CIK) {
+               switch (rtex->last_msaa_resolve_target_micro_mode) {
+               case 0: /* displayable */
+                       rtex->surface.tiling_index[0] = 10;
+                       break;
+               case 1: /* thin */
+                       rtex->surface.tiling_index[0] = 14;
+                       break;
+               case 3: /* rotated */
+                       rtex->surface.tiling_index[0] = 28;
+                       break;
+               default: /* depth, thick */
+                       assert(!"unexpected micro mode");
+                       return;
+               }
+       } else { /* SI */
+               switch (rtex->last_msaa_resolve_target_micro_mode) {
+               case 0: /* displayable */
+                       switch (rtex->surface.bpe) {
+                       case 8:
+                            rtex->surface.tiling_index[0] = 10;
+                            break;
+                       case 16:
+                            rtex->surface.tiling_index[0] = 11;
+                            break;
+                       default: /* 32, 64 */
+                            rtex->surface.tiling_index[0] = 12;
+                            break;
+                       }
+                       break;
+               case 1: /* thin */
+                       switch (rtex->surface.bpe) {
+                       case 8:
+                                rtex->surface.tiling_index[0] = 14;
+                                break;
+                       case 16:
+                                rtex->surface.tiling_index[0] = 15;
+                                break;
+                       case 32:
+                                rtex->surface.tiling_index[0] = 16;
+                                break;
+                       default: /* 64, 128 */
+                                rtex->surface.tiling_index[0] = 17;
+                                break;
+                       }
+                       break;
+               default: /* depth, thick */
+                       assert(!"unexpected micro mode");
+                       return;
+               }
+       }
+
+       rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
+
+       p_atomic_inc(&rscreen->dirty_fb_counter);
+       p_atomic_inc(&rscreen->dirty_tex_descriptor_counter);
+}
+
 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                   struct pipe_framebuffer_state *fb,
                                   struct r600_atom *fb_state,
@@ -1881,6 +1960,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                        if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
                                continue;
 
+                       /* We can change the micro tile mode before a full clear. */
+                       if (rctx->screen->chip_class >= SI)
+                               si_set_optimal_micro_tile_mode(rctx->screen, tex);
+
                        vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
                        vi_dcc_clear_level(rctx, tex, 0, reset_value);
 
@@ -1897,6 +1980,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                                continue;
                        }
 
+                       /* We can change the micro tile mode before a full clear. */
+                       if (rctx->screen->chip_class >= SI)
+                               si_set_optimal_micro_tile_mode(rctx->screen, tex);
+
                        /* Do the fast clear. */
                        rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
                                           tex->cmask.offset, tex->cmask.size, 0,
index 9de2c755ebf0fe0a9388756e623d6ef22b587d5a..754b478432c985df0a596b96f61ba91bfa70e7a9 100644 (file)
@@ -22,6 +22,7 @@
  */
 
 #include "si_pipe.h"
+#include "sid.h"
 #include "util/u_format.h"
 #include "util/u_surface.h"
 
@@ -903,8 +904,18 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
            info->src.box.height == dst_height &&
            info->src.box.depth == 1 &&
            dst->surface.level[info->dst.level].mode >= RADEON_SURF_MODE_1D &&
-           src->surface.micro_tile_mode == dst->surface.micro_tile_mode &&
            (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
+               /* Check the last constraint. */
+               if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
+                       /* The next fast clear will switch to this mode to
+                        * get direct hw resolve next time if the mode is
+                        * different now.
+                        */
+                       src->last_msaa_resolve_target_micro_mode =
+                               dst->surface.micro_tile_mode;
+                       goto resolve_to_temp;
+               }
+
                /* Resolving into a surface with DCC is unsupported. Since
                 * it's being overwritten anyway, clear it to uncompressed.
                 * This is still the fastest codepath even with this clear.
@@ -929,6 +940,7 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
                return true;
        }
 
+resolve_to_temp:
        /* Shader-based resolve is VERY SLOW. Instead, resolve into
         * a temporary texture and blit.
         */
@@ -943,6 +955,12 @@ static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
        templ.flags = R600_RESOURCE_FLAG_FORCE_TILING |
                      R600_RESOURCE_FLAG_DISABLE_DCC;
 
+       /* The src and dst microtile modes must be the same. */
+       if (src->surface.micro_tile_mode == V_009910_ADDR_SURF_DISPLAY_MICRO_TILING)
+               templ.bind = PIPE_BIND_SCANOUT;
+       else
+               templ.bind = 0;
+
        tmp = ctx->screen->resource_create(ctx->screen, &templ);
        if (!tmp)
                return false;