X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_texture.c;h=014391142411070aba893afb853e41d406e95ac6;hb=e2e700f6053d0b16ba46e4d5c5b20e965fb2224e;hp=b3052e5cd041a49845e7ba11bf22d1cec2038621;hpb=c164ea86e193c710d41de769ddfb169ab53ced51;p=mesa.git

diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index b3052e5cd04..01439114241 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -27,7 +27,7 @@
 #include "si_pipe.h"
 #include "si_query.h"
 #include "sid.h"
-#include "state_tracker/drm_driver.h"
+#include "frontend/drm_driver.h"
 #include "util/format/u_format.h"
 #include "util/os_time.h"
 #include "util/u_log.h"
@@ -74,6 +74,13 @@ bool si_prepare_for_dma_blit(struct si_context *sctx, struct si_texture *dst, un
    if (vi_dcc_enabled(src, src_level) || vi_dcc_enabled(dst, dst_level))
       return false;
 
+   /* TMZ: mixing encrypted and non-encrypted buffer in a single command
+    * doesn't seem supported.
+    */
+   if ((src->buffer.flags & RADEON_FLAG_ENCRYPTED) !=
+       (dst->buffer.flags & RADEON_FLAG_ENCRYPTED))
+      return false;
+
    /* CMASK as:
     *   src: Both texture and SDMA paths need decompression. Use SDMA.
     *   dst: If overwriting the whole texture, discard CMASK and use
@@ -209,8 +216,8 @@ static unsigned si_texture_get_offset(struct si_screen *sscreen, struct si_textu
 
 static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surface,
                            const struct pipe_resource *ptex, enum radeon_surf_mode array_mode,
-                           unsigned pitch_in_bytes_override, bool is_imported, bool is_scanout,
-                           bool is_flushed_depth, bool tc_compatible_htile)
+                           bool is_imported, bool is_scanout, bool is_flushed_depth,
+                           bool tc_compatible_htile)
 {
    const struct util_format_description *desc = util_format_description(ptex->format);
    bool is_depth, is_stencil;
@@ -250,7 +257,9 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac
    }
 
    if (sscreen->info.chip_class >= GFX8 &&
-       (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC || ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT ||
+       (ptex->flags & SI_RESOURCE_FLAG_DISABLE_DCC ||
+        (sscreen->info.chip_class < GFX10_3 &&
+         ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT) ||
         (ptex->nr_samples >= 2 && !sscreen->dcc_msaa_allowed)))
       flags |= RADEON_SURF_DISABLE_DCC;
 
@@ -311,26 +320,11 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac
       return r;
    }
 
-   unsigned pitch = pitch_in_bytes_override / bpe;
-
-   if (sscreen->info.chip_class >= GFX9) {
-      if (pitch) {
-         surface->u.gfx9.surf_pitch = pitch;
-         if (ptex->last_level == 0)
-            surface->u.gfx9.surf.epitch = pitch - 1;
-         surface->u.gfx9.surf_slice_size = (uint64_t)pitch * surface->u.gfx9.surf_height * bpe;
-      }
-   } else {
-      if (pitch) {
-         surface->u.legacy.level[0].nblk_x = pitch;
-         surface->u.legacy.level[0].slice_size_dw =
-            ((uint64_t)pitch * surface->u.legacy.level[0].nblk_y * bpe) / 4;
-      }
-   }
    return 0;
 }
 
-void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex)
+void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex,
+                                   bool *ctx_flushed)
 {
    struct si_screen *sscreen = sctx->screen;
    struct pipe_context *ctx = &sctx->b;
@@ -342,8 +336,14 @@ void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *t
    ctx->flush_resource(ctx, &tex->buffer.b.b);
 
    /* Flush only if any fast clear elimination took place. */
+   bool flushed = false;
    if (n != sctx->num_decompress_calls)
+   {
       ctx->flush(ctx, NULL, 0);
+      flushed = true;
+   }
+   if (ctx_flushed)
+      *ctx_flushed = flushed;
 
    if (ctx == sscreen->aux_context)
       simple_mtx_unlock(&sscreen->aux_context_lock);
@@ -721,9 +721,11 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex
       if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
           (tex->cmask_buffer || tex->surface.dcc_offset)) {
          /* Eliminate fast clear (both CMASK and DCC) */
-         si_eliminate_fast_color_clear(sctx, tex);
-         /* eliminate_fast_color_clear flushes the context */
-         flush = false;
+         bool flushed;
+         si_eliminate_fast_color_clear(sctx, tex, &flushed);
+         /* eliminate_fast_color_clear sometimes flushes the context */
+         if (flushed)
+            flush = false;
 
          /* Disable CMASK if flush_resource isn't going
           * to be called.
@@ -800,13 +802,9 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex
 
 static void si_texture_destroy(struct pipe_screen *screen, struct pipe_resource *ptex)
 {
-   struct si_screen *sscreen = (struct si_screen *)screen;
    struct si_texture *tex = (struct si_texture *)ptex;
    struct si_resource *resource = &tex->buffer;
 
-   if (sscreen->info.chip_class >= GFX9)
-      free(tex->surface.u.gfx9.dcc_retile_map);
-
    si_texture_reference(&tex->flushed_depth_texture, NULL);
 
    if (tex->cmask_buffer != &tex->buffer) {
@@ -974,7 +972,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
                                                    const struct pipe_resource *base,
                                                    const struct radeon_surf *surface,
                                                    const struct si_texture *plane0,
-                                                   struct pb_buffer *imported_buf, uint64_t offset,
+                                                   struct pb_buffer *imported_buf,
+                                                   uint64_t offset, unsigned pitch_in_bytes,
                                                    uint64_t alloc_size, unsigned alignment)
 {
    struct si_texture *tex;
@@ -994,7 +993,15 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
    /* don't include stencil-only formats which we don't support for rendering */
    tex->is_depth = util_format_has_depth(util_format_description(tex->buffer.b.b.format));
    tex->surface = *surface;
-   tex->tc_compatible_htile = false; /* This will be enabled on demand. */
+
+   /* On GFX8, HTILE uses different tiling depending on the TC_COMPATIBLE_HTILE
+    * setting, so we have to enable it if we enabled it at allocation.
+    *
+    * GFX9 and later use the same tiling for both, so TC-compatible HTILE can be
+    * enabled on demand.
+    */
+   tex->tc_compatible_htile = sscreen->info.chip_class == GFX8 &&
+                              tex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
 
    /* TC-compatible HTILE:
     * - GFX8 only supports Z32_FLOAT.
@@ -1020,12 +1027,9 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
     */
    tex->ps_draw_ratio = 0;
 
-   if (sscreen->info.chip_class >= GFX9) {
-      tex->surface.u.gfx9.surf_offset = offset;
-   } else {
-      for (unsigned i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
-         tex->surface.u.legacy.level[i].offset += offset;
-   }
+   ac_surface_override_offset_stride(&sscreen->info, &tex->surface,
+                                     tex->buffer.b.b.last_level + 1,
+                                     offset, pitch_in_bytes / tex->surface.bpe);
 
    if (tex->is_depth) {
       if (sscreen->info.chip_class >= GFX9) {
@@ -1155,20 +1159,14 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
           */
          bool use_uint16 = tex->surface.u.gfx9.dcc_retile_use_uint16;
          unsigned num_elements = tex->surface.u.gfx9.dcc_retile_num_elements;
+         unsigned dcc_retile_map_size = num_elements * (use_uint16 ? 2 : 4);
          struct si_resource *buf = si_aligned_buffer_create(screen, 0, PIPE_USAGE_STREAM,
-                                                            num_elements * (use_uint16 ? 2 : 4),
+                                                            dcc_retile_map_size,
                                                             sscreen->info.tcc_cache_line_size);
-         uint32_t *ui = (uint32_t *)sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE);
-         uint16_t *us = (uint16_t *)ui;
+         void *map = sscreen->ws->buffer_map(buf->buf, NULL, PIPE_TRANSFER_WRITE);
 
-         /* Upload the retile map into a staging buffer. */
-         if (use_uint16) {
-            for (unsigned i = 0; i < num_elements; i++)
-               us[i] = tex->surface.u.gfx9.dcc_retile_map[i];
-         } else {
-            for (unsigned i = 0; i < num_elements; i++)
-               ui[i] = tex->surface.u.gfx9.dcc_retile_map[i];
-         }
+         /* Upload the retile map into the staging buffer. */
+         memcpy(map, tex->surface.u.gfx9.dcc_retile_map, dcc_retile_map_size);
 
          /* Copy the staging buffer to the buffer backing the texture. */
          struct si_context *sctx = (struct si_context *)sscreen->aux_context;
@@ -1210,8 +1208,6 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen,
 
 error:
    FREE(tex);
-   if (sscreen->info.chip_class >= GFX9)
-      free(surface->u.gfx9.dcc_retile_map);
    return NULL;
 }
 
@@ -1229,7 +1225,7 @@ static enum radeon_surf_mode si_choose_tiling(struct si_screen *sscreen,
       return RADEON_SURF_MODE_2D;
 
    /* Transfer resources should be linear. */
-   if (templ->flags & SI_RESOURCE_FLAG_TRANSFER)
+   if (templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR)
       return RADEON_SURF_MODE_LINEAR_ALIGNED;
 
    /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on GFX8,
@@ -1285,7 +1281,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen,
 
    if (templ->nr_samples >= 2) {
       /* This is hackish (overwriting the const pipe_resource template),
-       * but should be harmless and state trackers can also see
+       * but should be harmless and gallium frontends can also see
        * the overriden number of samples in the created pipe_resource.
        */
       if (is_zs && sscreen->eqaa_force_z_samples) {
@@ -1297,8 +1293,8 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen,
       }
    }
 
-   bool is_flushed_depth =
-      templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH || templ->flags & SI_RESOURCE_FLAG_TRANSFER;
+   bool is_flushed_depth = templ->flags & SI_RESOURCE_FLAG_FLUSHED_DEPTH ||
+                           templ->flags & SI_RESOURCE_FLAG_FORCE_LINEAR;
    bool tc_compatible_htile =
       sscreen->info.chip_class >= GFX8 &&
       /* There are issues with TC-compatible HTILE on Tonga (and
@@ -1340,7 +1336,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen,
       if (num_planes > 1)
          plane_templ[i].bind |= PIPE_BIND_SHARED;
 
-      if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, 0, false,
+      if (si_init_surface(sscreen, &surface[i], &plane_templ[i], tile_mode, false,
                           plane_templ[i].bind & PIPE_BIND_SCANOUT, is_flushed_depth,
                           tc_compatible_htile))
          return NULL;
@@ -1355,7 +1351,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen,
    for (unsigned i = 0; i < num_planes; i++) {
       struct si_texture *tex =
          si_texture_create_object(screen, &plane_templ[i], &surface[i], plane0, NULL,
-                                  plane_offset[i], total_size, max_alignment);
+                                  plane_offset[i], 0, total_size, max_alignment);
       if (!tex) {
          si_texture_reference(&plane0, NULL);
          return NULL;
@@ -1378,7 +1374,7 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen,
 static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *sscreen,
                                                            const struct pipe_resource *templ,
                                                            struct pb_buffer *buf, unsigned stride,
-                                                           unsigned offset, unsigned usage,
+                                                           uint64_t offset, unsigned usage,
                                                            bool dedicated)
 {
    struct radeon_surf surface = {};
@@ -1418,12 +1414,13 @@ static struct pipe_resource *si_texture_from_winsys_buffer(struct si_screen *ssc
       metadata.mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
    }
 
-   r = si_init_surface(sscreen, &surface, templ, metadata.mode, stride, true,
+   r = si_init_surface(sscreen, &surface, templ, metadata.mode, true,
                        surface.flags & RADEON_SURF_SCANOUT, false, false);
    if (r)
       return NULL;
 
-   tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf, offset, 0, 0);
+   tex = si_texture_create_object(&sscreen->b, templ, &surface, NULL, buf,
+                                  offset, stride, 0, 0);
    if (!tex)
       return NULL;
 
@@ -1549,7 +1546,7 @@ bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resourc
  */
 static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pipe_resource *orig,
                                            const struct pipe_box *box, unsigned level,
-                                           unsigned flags)
+                                           unsigned usage, unsigned flags)
 {
    memset(res, 0, sizeof(*res));
    res->format = orig->format;
@@ -1557,10 +1554,10 @@ static void si_init_temp_resource_from_box(struct pipe_resource *res, struct pip
    res->height0 = box->height;
    res->depth0 = 1;
    res->array_size = 1;
-   res->usage = flags & SI_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
+   res->usage = usage;
    res->flags = flags;
 
-   if (flags & SI_RESOURCE_FLAG_TRANSFER && util_format_is_compressed(orig->format)) {
+   if (flags & SI_RESOURCE_FLAG_FORCE_LINEAR && util_format_is_compressed(orig->format)) {
       /* Transfer resources are allocated with linear tiling, which is
        * not supported for compressed formats.
        */
@@ -1626,24 +1623,9 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
    char *map;
    bool use_staging_texture = false;
 
-   assert(!(texture->flags & SI_RESOURCE_FLAG_TRANSFER));
+   assert(!(texture->flags & SI_RESOURCE_FLAG_FORCE_LINEAR));
    assert(box->width && box->height && box->depth);
 
-   /* If we are uploading into FP16 or R11G11B10_FLOAT via a blit, CB clobbers NaNs,
-    * so in order to preserve them exactly, we have to use the compute blit.
-    * The compute blit is used only when the destination doesn't have DCC, so
-    * disable it here, which is kinda a hack.
-    *
-    * This makes KHR-GL45.texture_view.view_classes pass on gfx9.
-    * gfx10 has the same issue, but the test doesn't use a large enough texture
-    * to enable DCC and fail, so it always passes.
-    */
-   const struct util_format_description *desc = util_format_description(texture->format);
-   if (vi_dcc_enabled(tex, level) &&
-       desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
-       desc->channel[0].size < 32)
-      si_texture_disable_dcc(sctx, tex);
-
    if (tex->is_depth) {
       /* Depth textures use staging unconditionally. */
       use_staging_texture = true;
@@ -1668,7 +1650,7 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
        * Use the staging texture for uploads if the underlying BO
        * is busy.
        */
-      if (!tex->surface.is_linear)
+      if (!tex->surface.is_linear || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED))
          use_staging_texture = true;
       else if (usage & PIPE_TRANSFER_READ)
          use_staging_texture =
@@ -1695,9 +1677,24 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
    if (use_staging_texture) {
       struct pipe_resource resource;
       struct si_texture *staging;
+      unsigned bo_usage = usage & PIPE_TRANSFER_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
+      unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR;
 
-      si_init_temp_resource_from_box(&resource, texture, box, level, SI_RESOURCE_FLAG_TRANSFER);
-      resource.usage = (usage & PIPE_TRANSFER_READ) ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
+      /* The pixel shader has a bad access pattern for linear textures.
+       * If a pixel shader is used to blit to/from staging, don't disable caches.
+       *
+       * MSAA, depth/stencil textures, and compressed textures use the pixel shader
+       * to blit.
+       */
+      if (texture->nr_samples <= 1 &&
+          !tex->is_depth &&
+          !util_format_is_compressed(texture->format) &&
+          /* Texture uploads with DCC use the pixel shader to blit */
+          (!(usage & PIPE_TRANSFER_WRITE) || !vi_dcc_enabled(tex, level)))
+         bo_flags |= SI_RESOURCE_FLAG_UNCACHED;
+
+      si_init_temp_resource_from_box(&resource, texture, box, level, bo_usage,
+                                     bo_flags);
 
       /* Since depth-stencil textures don't support linear tiling,
        * blit from ZS to color and vice versa. u_blitter will do