nir_intrinsic_instr *instr)
{
LLVMValueRef ptr, addr;
+ LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+ unsigned index = nir_intrinsic_base(instr);
- addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
- addr = LLVMBuildAdd(ctx->ac.builder, addr,
- get_src(ctx, instr->src[0]), "");
+ addr = LLVMConstInt(ctx->ac.i32, index, 0);
+ addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
+
+ /* Load constant values from user SGPRS when possible, otherwise
+ * fallback to the default path that loads directly from memory.
+ */
+ if (LLVMIsConstant(src0) &&
+ instr->dest.ssa.bit_size == 32) {
+ unsigned count = instr->dest.ssa.num_components;
+ unsigned offset = index;
+
+ offset += LLVMConstIntGetZExtValue(src0);
+ offset /= 4;
+
+ offset -= ctx->abi->base_inline_push_consts;
+
+ if (offset + count <= ctx->abi->num_inline_push_consts) {
+ return ac_build_gather_values(&ctx->ac,
+ ctx->abi->inline_push_consts + offset,
+ count);
+ }
+ }
ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr);
#define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
+#define AC_MAX_INLINE_PUSH_CONSTS 8
+
enum ac_descriptor_type {
AC_DESC_IMAGE,
AC_DESC_FMASK,
/* Vulkan only */
LLVMValueRef push_constants;
+ LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
+ unsigned num_inline_push_consts;
+ unsigned base_inline_push_consts;
LLVMValueRef view_index;
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
}
}
+static void
+radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_pipeline *pipeline,
+ gl_shader_stage stage,
+ int idx, int count, uint32_t *values)
+{
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+ uint32_t base_reg = pipeline->user_data_0[stage];
+ if (loc->sgpr_idx == -1)
+ return;
+
+ assert(loc->num_sgprs == count);
+
+ radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
+ radeon_emit_array(cmd_buffer->cs, values, count);
+}
+
static void
radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline)
radv_get_descriptors_state(cmd_buffer, bind_point);
struct radv_pipeline_layout *layout = pipeline->layout;
struct radv_shader_variant *shader, *prev_shader;
+ bool need_push_constants = false;
unsigned offset;
void *ptr;
uint64_t va;
(!layout->push_constant_size && !layout->dynamic_offset_count))
return;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
- 16 * layout->dynamic_offset_count,
- 256, &offset, &ptr))
- return;
+ radv_foreach_stage(stage, stages) {
+ if (!pipeline->shaders[stage])
+ continue;
- memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
- memcpy((char*)ptr + layout->push_constant_size,
- descriptors_state->dynamic_buffers,
- 16 * layout->dynamic_offset_count);
+ need_push_constants |= pipeline->shaders[stage]->info.info.loads_push_constants;
+ need_push_constants |= pipeline->shaders[stage]->info.info.loads_dynamic_offsets;
- va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += offset;
+ uint8_t base = pipeline->shaders[stage]->info.info.base_inline_push_consts;
+ uint8_t count = pipeline->shaders[stage]->info.info.num_inline_push_consts;
- MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+ radv_emit_inline_push_consts(cmd_buffer, pipeline, stage,
+ AC_UD_INLINE_PUSH_CONSTANTS,
+ count,
+ (uint32_t *)&cmd_buffer->push_constants[base * 4]);
+ }
- prev_shader = NULL;
- radv_foreach_stage(stage, stages) {
- shader = radv_get_shader(pipeline, stage);
+ if (need_push_constants) {
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
+ 16 * layout->dynamic_offset_count,
+ 256, &offset, &ptr))
+ return;
+
+ memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
+ memcpy((char*)ptr + layout->push_constant_size,
+ descriptors_state->dynamic_buffers,
+ 16 * layout->dynamic_offset_count);
+
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += offset;
+
+ MAYBE_UNUSED unsigned cdw_max =
+ radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+
+ prev_shader = NULL;
+ radv_foreach_stage(stage, stages) {
+ shader = radv_get_shader(pipeline, stage);
- /* Avoid redundantly emitting the address for merged stages. */
- if (shader && shader != prev_shader) {
- radv_emit_userdata_address(cmd_buffer, pipeline, stage,
- AC_UD_PUSH_CONSTANTS, va);
+ /* Avoid redundantly emitting the address for merged stages. */
+ if (shader && shader != prev_shader) {
+ radv_emit_userdata_address(cmd_buffer, pipeline, stage,
+ AC_UD_PUSH_CONSTANTS, va);
- prev_shader = shader;
+ prev_shader = shader;
+ }
}
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
cmd_buffer->push_constant_stages &= ~stages;
- assert(cmd_buffer->cs->cdw <= cdw_max);
}
static void
return count;
}
+static void allocate_inline_push_consts(struct radv_shader_context *ctx,
+ struct user_sgpr_info *user_sgpr_info)
+{
+ uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
+
+ /* Only supported if shaders use push constants. */
+ if (ctx->shader_info->info.min_push_constant_used == UINT8_MAX)
+ return;
+
+ /* Only supported if shaders don't have indirect push constants. */
+ if (ctx->shader_info->info.has_indirect_push_constants)
+ return;
+
+ /* Only supported for 32-bit push constants. */
+ if (!ctx->shader_info->info.has_only_32bit_push_constants)
+ return;
+
+ uint8_t num_push_consts =
+ (ctx->shader_info->info.max_push_constant_used -
+ ctx->shader_info->info.min_push_constant_used) / 4;
+
+ /* Check if the number of user SGPRs is large enough. */
+ if (num_push_consts < remaining_sgprs) {
+ ctx->shader_info->info.num_inline_push_consts = num_push_consts;
+ } else {
+ ctx->shader_info->info.num_inline_push_consts = remaining_sgprs;
+ }
+
+ /* Clamp to the maximum number of allowed inlined push constants. */
+ if (ctx->shader_info->info.num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
+ ctx->shader_info->info.num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
+
+ if (ctx->shader_info->info.num_inline_push_consts == num_push_consts &&
+ !ctx->shader_info->info.loads_dynamic_offsets) {
+ /* Disable the default push constants path if all constants are
+ * inlined and if shaders don't use dynamic descriptors.
+ */
+ ctx->shader_info->info.loads_push_constants = false;
+ }
+
+ ctx->shader_info->info.base_inline_push_consts =
+ ctx->shader_info->info.min_push_constant_used / 4;
+}
+
static void allocate_user_sgprs(struct radv_shader_context *ctx,
gl_shader_stage stage,
bool has_previous_stage,
} else {
user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
}
+
+ allocate_inline_push_consts(ctx, user_sgpr_info);
}
static void
add_arg(args, ARG_SGPR, type, &ctx->abi.push_constants);
}
+ for (unsigned i = 0; i < ctx->shader_info->info.num_inline_push_consts; i++) {
+ add_arg(args, ARG_SGPR, ctx->ac.i32,
+ &ctx->abi.inline_push_consts[i]);
+ }
+ ctx->abi.num_inline_push_consts = ctx->shader_info->info.num_inline_push_consts;
+ ctx->abi.base_inline_push_consts = ctx->shader_info->info.base_inline_push_consts;
+
if (ctx->shader_info->info.so.num_outputs) {
add_arg(args, ARG_SGPR,
ac_array_in_const32_addr_space(ctx->ac.v4i32),
set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
+ if (ctx->shader_info->info.num_inline_push_consts) {
+ set_loc_shader(ctx, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
+ ctx->shader_info->info.num_inline_push_consts);
+ }
+
if (ctx->streamout_buffers) {
set_loc_shader_ptr(ctx, AC_UD_STREAMOUT_BUFFERS,
user_sgpr_idx);
enum radv_ud_index {
AC_UD_SCRATCH_RING_OFFSETS = 0,
AC_UD_PUSH_CONSTANTS = 1,
- AC_UD_INDIRECT_DESCRIPTOR_SETS = 2,
- AC_UD_VIEW_INDEX = 3,
- AC_UD_STREAMOUT_BUFFERS = 4,
- AC_UD_SHADER_START = 5,
+ AC_UD_INLINE_PUSH_CONSTANTS = 2,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
+ AC_UD_VIEW_INDEX = 4,
+ AC_UD_STREAMOUT_BUFFERS = 5,
+ AC_UD_SHADER_START = 6,
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_MAX_UD,
uint8_t max_push_constant_used;
bool has_only_32bit_push_constants;
bool has_indirect_push_constants;
+ uint8_t num_inline_push_consts;
+ uint8_t base_inline_push_consts;
uint32_t desc_set_used_mask;
bool needs_multiview_view_index;
bool uses_invocation_id;