winsys/amdgpu: enable DCC for mipmapped textures
authorMarek Olšák <marek.olsak@amd.com>
Fri, 3 Jun 2016 18:48:01 +0000 (20:48 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Tue, 7 Jun 2016 22:22:45 +0000 (00:22 +0200)
Also add dcc_fast_clear_size for clearing only the necessary subset
of DCC. For no AA, it's equal to the size of the whole DCC level.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/gallium/drivers/radeon/r600_texture.c
src/gallium/drivers/radeon/radeon_winsys.h
src/gallium/winsys/amdgpu/drm/amdgpu_surface.c

index 27b464fa509d18b699f6750708f6e9423dc8bdd4..9daad65a04c04496726ca8a7ff6acde547617289 100644 (file)
@@ -931,8 +931,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
                        rtex->dcc_offset, rtex->surface.dcc_size,
                        rtex->surface.dcc_alignment);
                for (i = 0; i <= rtex->surface.last_level; i++)
-                       fprintf(f, "  DCCLevel[%i]: offset=%"PRIu64"\n",
-                               i, rtex->surface.level[i].dcc_offset);
+                       fprintf(f, "  DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
+                               "fast_clear_size=%"PRIu64"\n",
+                               i, rtex->surface.level[i].dcc_enabled,
+                               rtex->surface.level[i].dcc_offset,
+                               rtex->surface.level[i].dcc_fast_clear_size);
        }
 
        for (i = 0; i <= rtex->surface.last_level; i++)
@@ -1865,7 +1868,8 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
                        vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
 
                        rctx->clear_buffer(&rctx->b, &tex->resource.b.b,
-                                          tex->dcc_offset, tex->surface.dcc_size,
+                                          tex->dcc_offset,
+                                          tex->surface.level[0].dcc_fast_clear_size,
                                           reset_value, R600_COHERENCY_CB_META);
 
                        if (clear_words_needed)
index d7bb1654a7a3cb01a3a8a35fd0a5bb4af8bf4ec7..c2d1f9ef3ea4718e8dd6b97bcb846717b1d751ab 100644 (file)
@@ -360,6 +360,7 @@ struct radeon_surf_level {
     uint32_t                    pitch_bytes;
     uint32_t                    mode;
     uint64_t                    dcc_offset;
+    uint64_t                    dcc_fast_clear_size;
     bool                        dcc_enabled;
 };
 
index 52b3fa88336fd1b41af6c975bc33f5909036cab5..9f52588c1470a87fadd67d6018eea0739b15a399 100644 (file)
@@ -230,7 +230,9 @@ static int compute_level(struct amdgpu_winsys *ws,
    surf_level->dcc_offset = 0;
    surf_level->dcc_enabled = false;
 
-   if (AddrSurfInfoIn->flags.dccCompatible) {
+   /* The previous level's flag tells us if we can use DCC for this level. */
+   if (AddrSurfInfoIn->flags.dccCompatible &&
+       (level == 0 || AddrDccOut->subLvlCompressible)) {
       AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
       AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
       AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
@@ -243,14 +245,11 @@ static int compute_level(struct amdgpu_winsys *ws,
 
       if (ret == ADDR_OK) {
          surf_level->dcc_offset = surf->dcc_size;
+         surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
          surf_level->dcc_enabled = true;
          surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
          surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
-      } else {
-         surf->dcc_size = 0;
       }
-   } else {
-      surf->dcc_size = 0;
    }
 
    return 0;
@@ -344,11 +343,19 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
    AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
    AddrSurfInfoIn.flags.degrade4Space = 1;
+
+   /* DCC notes:
+    * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
+    *   with samples >= 4.
+    * - Mipmapped array textures have low performance (discovered by a closed
+    *   driver team).
+    */
    AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
                                         !(surf->flags & RADEON_SURF_SCANOUT) &&
                                         !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
                                         !compressed && AddrDccIn.numSamples <= 1 &&
-                                        surf->last_level == 0;
+                                        ((surf->array_size == 1 && surf->npix_z == 1) ||
+                                         surf->last_level == 0);
 
    AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
    AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;
@@ -445,6 +452,16 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
       }
    }
 
+   /* Recalculate the whole DCC miptree size including disabled levels.
+    * This is what addrlib does, but calling addrlib would be a lot more
+    * complicated.
+    */
+   if (surf->dcc_size && surf->last_level > 0) {
+      surf->dcc_size = align64(surf->bo_size >> 8,
+                               ws->info.pipe_interleave_bytes *
+                               ws->info.num_tile_pipes);
+   }
+
    return 0;
 }