radeonsi: add support for displayable DCC for 1 RB chips
authorMarek Olšák <marek.olsak@amd.com>
Sat, 5 Jan 2019 00:19:54 +0000 (19:19 -0500)
committerMarek Olšák <marek.olsak@amd.com>
Thu, 4 Apr 2019 13:53:24 +0000 (09:53 -0400)
This is the simpler codepath - just disable RB and pipe alignment for DCC.

src/amd/common/ac_gpu_info.c
src/amd/common/ac_gpu_info.h
src/amd/common/ac_surface.c
src/amd/common/ac_surface.h
src/gallium/drivers/radeon/radeon_winsys.h
src/gallium/drivers/radeonsi/si_texture.c
src/gallium/winsys/amdgpu/drm/amdgpu_bo.c

index fc8c6a09d2f17aafb4258b65c20a54f4a9d87432..a6d249a6d2f1efb0315bd6a1c151b4c40d3d450c 100644 (file)
@@ -503,6 +503,8 @@ void ac_print_gpu_info(struct radeon_info *info)
        printf("    clock_crystal_freq = %i\n", info->clock_crystal_freq);
        printf("    tcc_cache_line_size = %u\n", info->tcc_cache_line_size);
 
+       printf("    use_display_dcc_unaligned = %u\n", info->use_display_dcc_unaligned);
+
        printf("Memory info:\n");
        printf("    pte_fragment_size = %u\n", info->pte_fragment_size);
        printf("    gart_page_size = %u\n", info->gart_page_size);
index b1ef9c53734a3cc73f569f55a94b98b4c19dfd30..99fed5206184bb1be03fbaedad6b1019ab77ac17 100644 (file)
@@ -56,6 +56,9 @@ struct radeon_info {
        uint32_t                    clock_crystal_freq;
        uint32_t                    tcc_cache_line_size;
 
+       /* Disable RB and pipe alignment to skip the retile blit. (1 RB chips only) */
+       bool                        use_display_dcc_unaligned;
+
        /* Memory info. */
        uint32_t                    pte_fragment_size;
        uint32_t                    gart_page_size;
index 27e63c318e6ca85ec3d56d2089b1ad52d67322e0..1f43b6071744522dca704ccb8ed5b5c68a36ddf2 100644 (file)
@@ -478,7 +478,8 @@ static bool get_display_flag(const struct ac_surf_config *config,
        unsigned num_channels = config->info.num_channels;
        unsigned bpe = surf->bpe;
 
-       if (surf->flags & RADEON_SURF_SCANOUT &&
+       if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
+           surf->flags & RADEON_SURF_SCANOUT &&
            config->info.samples <= 1 &&
            surf->blk_w <= 2 && surf->blk_h == 1) {
                /* subsampled */
@@ -1217,7 +1218,7 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 
                        surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
                        surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
-                       surf->u.gfx9.dcc_pitch_max = dout.pitch - 1;
+                       surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
                        surf->dcc_size = dout.dccRamSize;
                        surf->dcc_alignment = dout.dccRamBaseAlign;
                        surf->num_dcc_levels = in->numMipLevels;
@@ -1453,6 +1454,19 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
        AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
        AddrSurfInfoIn.flags.metaRbUnaligned = 0;
 
+       /* The display hardware can only read DCC with RB_ALIGNED=0 and
+        * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
+        *
+        * The CB block requires RB_ALIGNED=1 except 1 RB chips.
+        * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
+        * after rendering, so PIPE_ALIGNED=1 is recommended.
+        */
+       if (info->use_display_dcc_unaligned && is_color_surface &&
+           AddrSurfInfoIn.flags.display) {
+               AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
+               AddrSurfInfoIn.flags.metaRbUnaligned = 1;
+       }
+
        switch (mode) {
        case RADEON_SURF_MODE_LINEAR_ALIGNED:
                assert(config->info.samples <= 1);
@@ -1525,6 +1539,13 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
                                           surf->bpe * 8, &displayable);
                if (r)
                        return r;
+
+               /* Display needs unaligned DCC. */
+               if (info->use_display_dcc_unaligned &&
+                   surf->num_dcc_levels &&
+                   (surf->u.gfx9.dcc.pipe_aligned ||
+                    surf->u.gfx9.dcc.rb_aligned))
+                       displayable = false;
        }
        surf->is_displayable = displayable;
 
index 7ae166c70a3bb9095654a808d024fa4a9260246a..eb50c37c3c27cb07e0946e689a9782fa328829d6 100644 (file)
@@ -149,7 +149,7 @@ struct gfx9_surf_layout {
     /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
     uint32_t                    offset[RADEON_SURF_MAX_LEVELS];
 
-    uint16_t                    dcc_pitch_max;  /* (mip chain pitch - 1) */
+    uint16_t                    display_dcc_pitch_max;  /* (mip chain pitch - 1) */
 
     uint64_t                    stencil_offset; /* separate stencil */
 };
index 82feef3948745abe3f3a376d8fb0608fcdb0362a..4e53c992fdfa77db291d0ba71f46049938c3fc67 100644 (file)
@@ -217,6 +217,12 @@ struct radeon_bo_metadata {
         struct {
             /* surface flags */
             unsigned swizzle_mode:5;
+
+            /* DCC flags */
+            /* [31:8]: max offset = 4GB - 256; 0 = DCC disabled */
+            unsigned dcc_offset_256B:24;
+            unsigned dcc_pitch_max:14;   /* (mip chain pitch - 1) for DCN */
+            unsigned dcc_independent_64B:1;
         } gfx9;
     } u;
 
index 581f90a7b2f3b5fcfe322f2b42d365d970af451a..cb62f153e5931b6278c375329082ac76f3d94f67 100644 (file)
@@ -37,6 +37,7 @@
 #include <inttypes.h>
 #include "state_tracker/drm_driver.h"
 #include "amd/common/sid.h"
+#include "amd/common/gfx9d.h"
 
 static enum radeon_surf_mode
 si_choose_tiling(struct si_screen *sscreen,
@@ -351,6 +352,11 @@ static void si_get_display_metadata(struct si_screen *sscreen,
                              metadata->u.gfx9.swizzle_mode % 4 == 2;
 
                surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
+
+               if (metadata->u.gfx9.dcc_offset_256B) {
+                       surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max;
+                       assert(metadata->u.gfx9.dcc_independent_64B == 1);
+               }
        } else {
                surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
                surf->u.legacy.bankw = metadata->u.legacy.bankw;
@@ -617,6 +623,15 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen,
 
        if (sscreen->info.chip_class >= GFX9) {
                md.u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
+
+               if (tex->dcc_offset && !tex->dcc_separate_buffer) {
+                       uint64_t dcc_offset = tex->dcc_offset;
+
+                       assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
+                       md.u.gfx9.dcc_offset_256B = dcc_offset >> 8;
+                       md.u.gfx9.dcc_pitch_max = tex->surface.u.gfx9.display_dcc_pitch_max;
+                       md.u.gfx9.dcc_independent_64B = 1;
+               }
        } else {
                md.u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
                                           RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
@@ -706,6 +721,23 @@ static void si_get_opaque_metadata(struct si_screen *sscreen,
            md->metadata[1] == si_get_bo_metadata_word1(sscreen) &&
            G_008F28_COMPRESSION_EN(desc[6])) {
                tex->dcc_offset = (uint64_t)desc[7] << 8;
+
+               if (sscreen->info.chip_class >= GFX9) {
+                       /* Fix up parameters for displayable DCC. Some state
+                        * trackers don't set the SCANOUT flag when importing
+                        * displayable images, so we have to recover the correct
+                        * parameters here.
+                        */
+                       tex->surface.u.gfx9.dcc.pipe_aligned =
+                               G_008F24_META_PIPE_ALIGNED(desc[5]);
+                       tex->surface.u.gfx9.dcc.rb_aligned =
+                               G_008F24_META_RB_ALIGNED(desc[5]);
+
+                       /* If DCC is unaligned, this can only be a displayable image. */
+                       if (!tex->surface.u.gfx9.dcc.pipe_aligned &&
+                           !tex->surface.u.gfx9.dcc.rb_aligned)
+                               tex->surface.is_displayable = true;
+               }
                return;
        }
 
@@ -715,6 +747,25 @@ static void si_get_opaque_metadata(struct si_screen *sscreen,
        tex->dcc_offset = 0;
 }
 
+static bool si_has_displayable_dcc(struct si_texture *tex)
+{
+       struct si_screen *sscreen = (struct si_screen*)tex->buffer.b.b.screen;
+
+       if (sscreen->info.chip_class <= VI)
+               return false;
+
+       /* This needs a cache flush before scanout.
+        * (it can't be scanned out and rendered to simultaneously)
+        */
+       if (sscreen->info.use_display_dcc_unaligned &&
+           tex->dcc_offset &&
+           !tex->surface.u.gfx9.dcc.pipe_aligned &&
+           !tex->surface.u.gfx9.dcc.rb_aligned)
+               return true;
+
+       return false;
+}
+
 static boolean si_texture_get_handle(struct pipe_screen* screen,
                                     struct pipe_context *ctx,
                                     struct pipe_resource *resource,
@@ -759,7 +810,10 @@ static boolean si_texture_get_handle(struct pipe_screen* screen,
                 * disable it for external clients that want write
                 * access.
                 */
-               if (usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->dcc_offset) {
+               if ((usage & PIPE_HANDLE_USAGE_SHADER_WRITE && tex->dcc_offset) ||
+                   /* Displayable DCC requires an explicit flush. */
+                   (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
+                    si_has_displayable_dcc(tex))) {
                        if (si_texture_disable_dcc(sctx, tex)) {
                                update_metadata = true;
                                /* si_texture_disable_dcc flushes the context */
@@ -1012,7 +1066,7 @@ void si_print_texture_info(struct si_screen *sscreen,
                                "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
                                tex->dcc_offset, tex->surface.dcc_size,
                                tex->surface.dcc_alignment,
-                               tex->surface.u.gfx9.dcc_pitch_max,
+                               tex->surface.u.gfx9.display_dcc_pitch_max,
                                tex->surface.num_dcc_levels);
                }
 
@@ -1199,8 +1253,9 @@ si_texture_create_object(struct pipe_screen *screen,
                 */
                if (tex->surface.dcc_size &&
                    (buf || !(sscreen->debug_flags & DBG(NO_DCC))) &&
-                   !(tex->surface.flags & RADEON_SURF_SCANOUT)) {
-                       /* Reserve space for the DCC buffer. */
+                   (sscreen->info.use_display_dcc_unaligned ||
+                    !(tex->surface.flags & RADEON_SURF_SCANOUT))) {
+                       /* Add space for the DCC buffer. */
                        tex->dcc_offset = align64(tex->size, tex->surface.dcc_alignment);
                        tex->size = tex->dcc_offset + tex->surface.dcc_size;
                }
@@ -1510,6 +1565,17 @@ static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *ssc
 
        si_get_opaque_metadata(sscreen, tex, &metadata);
 
+       /* Displayable DCC requires an explicit flush. */
+       if (dedicated &&
+           !(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
+           si_has_displayable_dcc(tex)) {
+               /* TODO: do we need to decompress DCC? */
+               if (si_texture_discard_dcc(sscreen, tex)) {
+                       /* Update BO metadata after disabling DCC. */
+                       si_set_tex_bo_metadata(sscreen, tex);
+               }
+       }
+
        assert(tex->surface.tile_swizzle == 0);
        return &tex->buffer.b.b;
 }
index 58979bd4ea785c4e5e42052d73f185504e81ad6c..c1863057370f927f6458e5ba85cad5a47e0c1013 100644 (file)
@@ -1226,6 +1226,10 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
 
    if (bo->ws->info.chip_class >= GFX9) {
       md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
+
+      md->u.gfx9.dcc_offset_256B = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B);
+      md->u.gfx9.dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
+      md->u.gfx9.dcc_independent_64B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
    } else {
       md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
       md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
@@ -1259,6 +1263,10 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
 
    if (bo->ws->info.chip_class >= GFX9) {
       tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
+
+      tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256B);
+      tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
+      tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64B);
    } else {
       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
          tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */