unsigned depth = src_box->depth;
enum pipe_format src_format = util_format_linear(src->format);
enum pipe_format dst_format = util_format_linear(dst->format);
+ bool is_linear = ((struct si_texture*)src)->surface.is_linear ||
+ ((struct si_texture*)dst)->surface.is_linear;
assert(util_format_is_subsampled_422(src_format) == util_format_is_subsampled_422(dst_format));
if (!sctx->cs_copy_image)
sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx);
ctx->bind_compute_state(ctx, sctx->cs_copy_image);
- info.block[0] = 8;
- info.last_block[0] = width % 8;
- info.block[1] = 8;
- info.last_block[1] = height % 8;
+
+ /* This is better for access over PCIe. */
+ if (is_linear) {
+ info.block[0] = 64;
+ info.block[1] = 1;
+ } else {
+ info.block[0] = 8;
+ info.block[1] = 8;
+ }
+ info.last_block[0] = width % info.block[0];
+ info.last_block[1] = height % info.block[1];
info.block[2] = 1;
- info.grid[0] = DIV_ROUND_UP(width, 8);
- info.grid[1] = DIV_ROUND_UP(height, 8);
+ info.grid[0] = DIV_ROUND_UP(width, info.block[0]);
+ info.grid[1] = DIV_ROUND_UP(height, info.block[1]);
info.grid[2] = depth;
}
{
static const char text[] =
"COMP\n"
- "PROPERTY CS_FIXED_BLOCK_WIDTH 8\n"
- "PROPERTY CS_FIXED_BLOCK_HEIGHT 8\n"
- "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
"DCL SV[0], THREAD_ID\n"
"DCL SV[1], BLOCK_ID\n"
+ "DCL SV[2], BLOCK_SIZE\n"
"DCL IMAGE[0], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
"DCL IMAGE[1], 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT, WR\n"
"DCL CONST[0][0..1]\n" // 0:xyzw 1:xyzw
"DCL TEMP[0..4], LOCAL\n"
- "IMM[0] UINT32 {8, 1, 0, 0}\n"
+
"MOV TEMP[0].xyz, CONST[0][0].xyzw\n"
- "UMAD TEMP[1].xyz, SV[1].xyzz, IMM[0].xxyy, SV[0].xyzz\n"
+ "UMAD TEMP[1].xyz, SV[1].xyzz, SV[2].xyzz, SV[0].xyzz\n"
"UADD TEMP[2].xyz, TEMP[1].xyzx, TEMP[0].xyzx\n"
"LOAD TEMP[3], IMAGE[0], TEMP[2].xyzx, 2D_ARRAY, PIPE_FORMAT_R32G32B32A32_FLOAT\n"
"MOV TEMP[4].xyz, CONST[0][1].xyzw\n"