*/
uint32_t blocks[3];
+ /**
+ * A starting offset for the grid. If unaligned is set, the offset
+ * must still be aligned.
+ */
+ uint32_t offsets[3];
/**
* Whether it's an unaligned compute dispatch.
*/
}
} else {
unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] };
+ unsigned offsets[3] = { info->offsets[0], info->offsets[1], info->offsets[2] };
if (info->unaligned) {
unsigned *cs_block_size = compute_shader->info.cs.block_size;
blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
+ for(unsigned i = 0; i < 3; ++i) {
+ assert(offsets[i] % cs_block_size[i] == 0);
+ offsets[i] /= cs_block_size[i];
+ }
+
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cs,
S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
radeon_emit(cs, blocks[2]);
}
+ if (offsets[0] || offsets[1] || offsets[2]) {
+ radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+ radeon_emit(cs, offsets[0]);
+ radeon_emit(cs, offsets[1]);
+ radeon_emit(cs, offsets[2]);
+
+ /* The blocks in the packet are not counts but end values. */
+ for (unsigned i = 0; i < 3; ++i)
+ blocks[i] += offsets[i];
+ } else {
+ dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
+ }
+
radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, 0) |
PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, blocks[0]);
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
}
-void radv_CmdDispatch(
+void radv_CmdDispatchBase(
VkCommandBuffer commandBuffer,
+ uint32_t base_x,
+ uint32_t base_y,
+ uint32_t base_z,
uint32_t x,
uint32_t y,
uint32_t z)
info.blocks[1] = y;
info.blocks[2] = z;
+ info.offsets[0] = base_x;
+ info.offsets[1] = base_y;
+ info.offsets[2] = base_z;
radv_dispatch(cmd_buffer, &info);
}
+void radv_CmdDispatch(
+ VkCommandBuffer commandBuffer,
+ uint32_t x,
+ uint32_t y,
+ uint32_t z)
+{
+ radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
+}
+
void radv_CmdDispatchIndirect(
VkCommandBuffer commandBuffer,
VkBuffer _buffer,