radeonsi: disable the L2 cache for most CPU mappings of textures
authorMarek Olšák <marek.olsak@amd.com>
Wed, 6 May 2020 18:51:50 +0000 (14:51 -0400)
committerMarge Bot <eric+marge@anholt.net>
Fri, 15 May 2020 22:12:35 +0000 (22:12 +0000)
for faster blits over PCIe and no need to flush L2

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4935>

src/gallium/drivers/radeonsi/si_buffer.c
src/gallium/drivers/radeonsi/si_pipe.h
src/gallium/drivers/radeonsi/si_texture.c

index 72d5b37dccab71deb9fe88c84cc8d54c05fff3c1..a1805ddfc03806ce0102c5de3eb6361c8642dc1a 100644 (file)
@@ -182,6 +182,14 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
    if (res->b.b.flags & SI_RESOURCE_FLAG_32BIT)
       res->flags |= RADEON_FLAG_32BIT;
 
+   /* For higher throughput and lower latency over PCIe assuming sequential access.
+    * Only CP DMA, SDMA, and optimized compute benefit from this.
+    * GFX8 and older don't support RADEON_FLAG_UNCACHED.
+    */
+   if (sscreen->info.chip_class >= GFX9 &&
+       res->b.b.flags & SI_RESOURCE_FLAG_UNCACHED)
+      res->flags |= RADEON_FLAG_UNCACHED;
+
    /* Set expected VRAM and GART usage for the buffer. */
    res->vram_usage = 0;
    res->gart_usage = 0;
index 9bd7ac1c6cb6552945a753f7fddd441afa9aeb33..746b39161ad1d839cf8fecb2459d54d490597943 100644 (file)
    (((x)&0x3) << SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT)
 #define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x)                                                    \
    (((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3)
+#define SI_RESOURCE_FLAG_UNCACHED          (PIPE_RESOURCE_FLAG_DRV_PRIV << 12)
 
 enum si_clear_code
 {
index e0e59aa37f3c3b0fcad4113d3c4401d743096301..f6f108f3ced6f87856cb3c0c7d53e9d2b3639a9d 100644 (file)
@@ -1694,10 +1694,24 @@ static void *si_texture_transfer_map(struct pipe_context *ctx, struct pipe_resou
    if (use_staging_texture) {
       struct pipe_resource resource;
       struct si_texture *staging;
-
       unsigned bo_usage = usage & PIPE_TRANSFER_READ ? PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
+      unsigned bo_flags = SI_RESOURCE_FLAG_FORCE_LINEAR;
+
+      /* The pixel shader has a bad access pattern for linear textures.
+       * If a pixel shader is used to blit to/from staging, don't disable caches.
+       *
+       * MSAA, depth/stencil textures, and compressed textures use the pixel shader
+       * to blit.
+       */
+      if (texture->nr_samples <= 1 &&
+          !tex->is_depth &&
+          !util_format_is_compressed(texture->format) &&
+          /* Texture uploads with DCC use the pixel shader to blit */
+          (!(usage & PIPE_TRANSFER_WRITE) || !vi_dcc_enabled(tex, level)))
+         bo_flags |= SI_RESOURCE_FLAG_UNCACHED;
+
       si_init_temp_resource_from_box(&resource, texture, box, level, bo_usage,
-                                     SI_RESOURCE_FLAG_FORCE_LINEAR);
+                                     bo_flags);
 
       /* Since depth-stencil textures don't support linear tiling,
        * blit from ZS to color and vice versa. u_blitter will do