From: Marek Olšák Date: Wed, 6 May 2020 18:12:27 +0000 (-0400) Subject: radeonsi: optimize access pattern for compute blits with linear textures X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b158b117e1ef69d47724f607fb5bd28389148fac;p=mesa.git radeonsi: optimize access pattern for compute blits with linear textures Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index c167a8bc7ee..97ca936f810 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -393,6 +393,8 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u unsigned depth = src_box->depth; enum pipe_format src_format = util_format_linear(src->format); enum pipe_format dst_format = util_format_linear(dst->format); + bool is_linear = ((struct si_texture*)src)->surface.is_linear || + ((struct si_texture*)dst)->surface.is_linear; assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format)); @@ -519,13 +521,20 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u if (!sctx->cs_copy_image) sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx); ctx->bind_compute_state(ctx, sctx->cs_copy_image); - info.block[0] = 8; - info.last_block[0] = width % 8; - info.block[1] = 8; - info.last_block[1] = height % 8; + + /* This is better for access over PCIe. */ + if (is_linear) { + info.block[0] = 64; + info.block[1] = 1; + } else { + info.block[0] = 8; + info.block[1] = 8; + } + info.last_block[0] = width % info.block[0]; + info.last_block[1] = height % info.block[1]; info.block[2] = 1; - info.grid[0] = DIV_ROUND_UP(width, 8); - info.grid[1] = DIV_ROUND_UP(height, 8); + info.grid[0] = DIV_ROUND_UP(width, info.block[0]); + info.grid[1] = DIV_ROUND_UP(height, info.block[1]); info.grid[2] = depth; } diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c index d1a97c210b0..c1a150d6ab3 100644 --- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c +++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c @@ -503,18 +503,16 @@ void *si_create_copy_image_compute_shader(struct pipe_context *ctx) { static const char text[] = "COMP\n" - "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n" - "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n" - "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n" "DCL SV[0], THREAD_ID\n" "DCL SV[1], BLOCK_ID\n" + "DCL SV[2], BLOCK_SIZE\n" "DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" "DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n" "DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw "DCL TEMP[0..4], LOCAL\n" - "IMM[0] UINT32 {8, 1, 0, 0}\n" + "MOV TEMP[0].xyz, CONST[0][0].xyzw\n" - "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n" + "UMAD TEMP[1].xyz, SV[1].xyzz, SV[2].xyzz, SV[0].xyzz\n" "UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n" "LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n" "MOV TEMP[4].xyz, CONST[0][1].xyzw\n"