radeonsi: optimize access pattern for compute blits with linear textures
authorMarek Olšák <marek.olsak@amd.com>
Wed, 6 May 2020 18:12:27 +0000 (14:12 -0400)
committerMarge Bot <eric+marge@anholt.net>
Fri, 15 May 2020 22:12:35 +0000 (22:12 +0000)
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4935>

src/gallium/drivers/radeonsi/si_compute_blit.c
src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c

index c167a8bc7ee578c0af53c5003986879a58a03807..97ca936f810b38aaa956e7fbc354217897f9e63f 100644 (file)
@@ -393,6 +393,8 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
    unsigned depth = src_box->depth;
    enum pipe_format src_format = util_format_linear(src->format);
    enum pipe_format dst_format = util_format_linear(dst->format);
+   bool is_linear = ((struct si_texture*)src)->surface.is_linear ||
+                    ((struct si_texture*)dst)->surface.is_linear;
 
    assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
 
@@ -519,13 +521,20 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
       if (!sctx->cs_copy_image)
          sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx);
       ctx->bind_compute_state(ctx, sctx->cs_copy_image);
-      info.block[0] = 8;
-      info.last_block[0] = width % 8;
-      info.block[1] = 8;
-      info.last_block[1] = height % 8;
+
+      /* This is better for access over PCIe. */
+      if (is_linear) {
+         info.block[0] = 64;
+         info.block[1] = 1;
+      } else {
+         info.block[0] = 8;
+         info.block[1] = 8;
+      }
+      info.last_block[0] = width % info.block[0];
+      info.last_block[1] = height % info.block[1];
       info.block[2] = 1;
-      info.grid[0] = DIV_ROUND_UP(width, 8);
-      info.grid[1] = DIV_ROUND_UP(height, 8);
+      info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
+      info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
       info.grid[2] = depth;
    }
 
index d1a97c210b0c117c93a1977d32d7d09af3c4a703..c1a150d6ab381d71327c53af331443df0122dbba 100644 (file)
@@ -503,18 +503,16 @@ void *si_create_copy_image_compute_shader(struct pipe_context *ctx)
 {
    static const char text[] =
       "COMP\n"
-      "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
-      "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
-      "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
       "DCL SV[0], THREAD_ID\n"
       "DCL SV[1], BLOCK_ID\n"
+      "DCL SV[2], BLOCK_SIZE\n"
       "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
       "DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
       "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw
       "DCL TEMP[0..4], LOCAL\n"
-      "IMM[0] UINT32 {8, 1, 0, 0}\n"
+
       "MOV TEMP[0].xyz, CONST[0][0].xyzw\n"
-      "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n"
+      "UMAD TEMP[1].xyz, SV[1].xyzz, SV[2].xyzz, SV[0].xyzz\n"
       "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n"
       "LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
       "MOV TEMP[4].xyz, CONST[0][1].xyzw\n"