unsigned num_return_elems,
struct arg_info *args,
unsigned max_workgroup_size,
- bool unsafe_math)
+ const struct radv_nir_compiler_options *options)
{
LLVMTypeRef main_function_type, ret_type;
LLVMBasicBlockRef main_function_body;
}
}
+ if (options->address32_hi) {
+ ac_llvm_add_target_dep_function_attr(main_function,
+ "amdgpu-32bit-address-high-bits",
+ options->address32_hi);
+ }
+
if (max_workgroup_size) {
ac_llvm_add_target_dep_function_attr(main_function,
"amdgpu-max-work-group-size",
max_workgroup_size);
}
- if (unsafe_math) {
+ if (options->unsafe_math) {
/* These were copied from some LLVM test. */
LLVMAddTargetDependentFunctionAttr(main_function,
"less-precise-fpmad",
set_loc(ud_info, sgpr_idx, num_sgprs, 0);
}
+static void
+set_loc_shader_ptr(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
+{
+ bool use_32bit_pointers = HAVE_32BIT_POINTERS &&
+ idx != AC_UD_SCRATCH_RING_OFFSETS;
+
+ set_loc_shader(ctx, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
+}
+
static void
set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx,
uint32_t indirect_offset)
&ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
assert(ud_info);
- set_loc(ud_info, sgpr_idx, 2, indirect_offset);
+ set_loc(ud_info, sgpr_idx, HAVE_32BIT_POINTERS ? 1 : 2, indirect_offset);
}
struct user_sgpr_info {
{
uint8_t count = 0;
- count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
+ if (ctx->shader_info->info.vs.has_vertex_buffers)
+ count += HAVE_32BIT_POINTERS ? 1 : 2;
count += ctx->shader_info->info.vs.needs_draw_id ? 3 : 2;
return count;
user_sgpr_info->sgpr_count++;
if (ctx->shader_info->info.loads_push_constants)
- user_sgpr_info->sgpr_count += 2;
+ user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count;
if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
- user_sgpr_info->sgpr_count += 2;
+ user_sgpr_info->sgpr_count += HAVE_32BIT_POINTERS ? 1 : 2;
user_sgpr_info->indirect_all_descriptor_sets = true;
} else {
user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
struct arg_info *args,
LLVMValueRef *desc_sets)
{
- LLVMTypeRef type = ac_array_in_const_addr_space(ctx->ac.i8);
+ LLVMTypeRef type = ac_array_in_const32_addr_space(ctx->ac.i8);
unsigned num_sets = ctx->options->layout ?
ctx->options->layout->num_sets : 0;
unsigned stage_mask = 1 << stage;
}
}
} else {
- add_array_arg(args, ac_array_in_const_addr_space(type), desc_sets);
+ add_array_arg(args, ac_array_in_const32_addr_space(type), desc_sets);
}
if (ctx->shader_info->info.loads_push_constants) {
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (ctx->shader_info->info.vs.has_vertex_buffers) {
- add_arg(args, ARG_SGPR, ac_array_in_const_addr_space(ctx->ac.v4i32),
+ add_arg(args, ARG_SGPR,
+ ac_array_in_const32_addr_space(ctx->ac.v4i32),
&ctx->vertex_buffers);
}
add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
ctx->descriptor_sets[i] = NULL;
}
} else {
- set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
- user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
+ user_sgpr_idx);
for (unsigned i = 0; i < num_sets; ++i) {
if ((ctx->shader_info->info.desc_set_used_mask & (1 << i)) &&
}
if (ctx->shader_info->info.loads_push_constants) {
- set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
}
(stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (ctx->shader_info->info.vs.has_vertex_buffers) {
- set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS,
- user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_VS_VERTEX_BUFFERS,
+ user_sgpr_idx);
}
unsigned vs_num = 2;
calling_conv = RADEON_LLVM_AMDGPU_GS;
break;
case MESA_SHADER_TESS_CTRL:
- calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
+ calling_conv = RADEON_LLVM_AMDGPU_HS;
break;
case MESA_SHADER_FRAGMENT:
calling_conv = RADEON_LLVM_AMDGPU_PS;
ctx->main_function = create_llvm_function(
ctx->context, ctx->ac.module, ctx->ac.builder, NULL, 0, &args,
- ctx->max_workgroup_size,
- ctx->options->unsafe_math);
+ ctx->max_workgroup_size, ctx->options);
set_llvm_calling_convention(ctx->main_function, stage);
user_sgpr_idx = 0;
if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
- set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS,
- &user_sgpr_idx, 2);
+ set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS,
+ &user_sgpr_idx);
if (ctx->options->supports_spill) {
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE),
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
- list = LLVMBuildPointerCast(builder, list, ac_array_in_const_addr_space(type), "");
+ list = LLVMBuildPointerCast(builder, list,
+ ac_array_in_const32_addr_space(type), "");
return ac_build_load_to_sgpr(&ctx->ac, list, index);
}
prepare_interp_optimize(struct radv_shader_context *ctx,
struct nir_shader *nir)
{
- if (!ctx->options->key.fs.multisample)
- return;
-
bool uses_center = false;
bool uses_centroid = false;
nir_foreach_variable(variable, &nir->inputs) {